diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index b5eea138732a5..c54afa1e6e72e 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -88,21 +88,30 @@ class RISCVTuneProcessorModel, GenericTuneInfo; def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64", - NoSchedModel, + GenericModel, [Feature64Bit, FeatureStdExtI], GenericTuneFeatures>, GenericTuneInfo; // Support generic for compatibility with other targets. The triple will be used // to change to the appropriate rv32/rv64 version. -def GENERIC : RISCVTuneProcessorModel<"generic", NoSchedModel>, GenericTuneInfo; +def GENERIC : RISCVTuneProcessorModel<"generic", GenericModel>, GenericTuneInfo; def MIPS_P8700 : RISCVProcessorModel<"mips-p8700", MIPSP8700Model, @@ -496,7 +505,7 @@ def TENSTORRENT_ASCALON_D8 : RISCVProcessorModel<"tt-ascalon-d8", TunePostRAScheduler]>; def VENTANA_VEYRON_V1 : RISCVProcessorModel<"veyron-v1", - NoSchedModel, + GenericModel, [Feature64Bit, FeatureStdExtI, FeatureStdExtZifencei, @@ -556,7 +565,7 @@ def XIANGSHAN_NANHU : RISCVProcessorModel<"xiangshan-nanhu", TuneShiftedZExtWFusion]>; def SPACEMIT_X60 : RISCVProcessorModel<"spacemit-x60", - NoSchedModel, + GenericModel, !listconcat(RVA22S64Features, [FeatureStdExtV, FeatureStdExtSscofpmf, @@ -581,7 +590,7 @@ def SPACEMIT_X60 : RISCVProcessorModel<"spacemit-x60", } def RP2350_HAZARD3 : RISCVProcessorModel<"rp2350-hazard3", - NoSchedModel, + GenericModel, [Feature32Bit, FeatureStdExtI, FeatureStdExtM, diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/add-imm.ll b/llvm/test/CodeGen/RISCV/GlobalISel/add-imm.ll index 0fd23a7d346df..1b96189aaea5c 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/add-imm.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/add-imm.ll @@ -212,30 +212,30 @@ define i64 @add64_accept(i64 %a) nounwind { define void @add32_reject() nounwind { ; RV32I-LABEL: add32_reject: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a0, %hi(ga) -; RV32I-NEXT: lui a1, %hi(gb) -; RV32I-NEXT: lw a2, %lo(ga)(a0) -; RV32I-NEXT: lw a3, %lo(gb)(a1) -; RV32I-NEXT: lui a4, 1 -; RV32I-NEXT: addi a4, a4, -1096 -; RV32I-NEXT: add a2, a2, a4 -; RV32I-NEXT: add a3, a3, a4 -; RV32I-NEXT: sw a2, %lo(ga)(a0) -; RV32I-NEXT: sw a3, %lo(gb)(a1) +; RV32I-NEXT: lui a0, 1 +; RV32I-NEXT: lui a1, %hi(ga) +; RV32I-NEXT: lui a2, %hi(gb) +; RV32I-NEXT: lw a3, %lo(ga)(a1) +; RV32I-NEXT: lw a4, %lo(gb)(a2) +; RV32I-NEXT: addi a0, a0, -1096 +; RV32I-NEXT: add a3, a3, a0 +; RV32I-NEXT: add a0, a4, a0 +; RV32I-NEXT: sw a3, %lo(ga)(a1) +; RV32I-NEXT: sw a0, %lo(gb)(a2) ; RV32I-NEXT: ret ; ; RV64I-LABEL: add32_reject: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a0, %hi(ga) -; RV64I-NEXT: lui a1, %hi(gb) -; RV64I-NEXT: lw a2, %lo(ga)(a0) -; RV64I-NEXT: lw a3, %lo(gb)(a1) -; RV64I-NEXT: lui a4, 1 -; RV64I-NEXT: addi a4, a4, -1096 -; RV64I-NEXT: add a2, a2, a4 -; RV64I-NEXT: add a3, a3, a4 -; RV64I-NEXT: sw a2, %lo(ga)(a0) -; RV64I-NEXT: sw a3, %lo(gb)(a1) +; RV64I-NEXT: lui a0, 1 +; RV64I-NEXT: lui a1, %hi(ga) +; RV64I-NEXT: lui a2, %hi(gb) +; RV64I-NEXT: lw a3, %lo(ga)(a1) +; RV64I-NEXT: lw a4, %lo(gb)(a2) +; RV64I-NEXT: addi a0, a0, -1096 +; RV64I-NEXT: add a3, a3, a0 +; RV64I-NEXT: add a0, a4, a0 +; RV64I-NEXT: sw a3, %lo(ga)(a1) +; RV64I-NEXT: sw a0, %lo(gb)(a2) ; RV64I-NEXT: ret %1 = load i32, ptr @ga, align 4 %2 = load i32, ptr @gb, align 4 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll index 3a55189076dee..5b9f0e60e7d80 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll @@ -93,49 +93,49 @@ define i32 @expanded_neg_abs32_unsigned(i32 %x) { define i64 @expanded_neg_abs64(i64 %x) { ; RV32I-LABEL: expanded_neg_abs64: ; RV32I: # %bb.0: -; RV32I-NEXT: snez a2, a0 -; RV32I-NEXT: neg a3, a1 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: neg a3, a0 -; RV32I-NEXT: beq a2, a1, .LBB2_2 +; RV32I-NEXT: neg a2, a0 +; RV32I-NEXT: snez a3, a0 +; RV32I-NEXT: neg a4, a1 +; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: beq a3, a1, .LBB2_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a4, a1, a2 +; RV32I-NEXT: slt a4, a1, a3 ; RV32I-NEXT: beqz a4, .LBB2_3 ; RV32I-NEXT: j .LBB2_4 ; RV32I-NEXT: .LBB2_2: -; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: bnez a4, .LBB2_4 ; RV32I-NEXT: .LBB2_3: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB2_4: -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a1, a2 +; RV32I-NEXT: neg a2, a3 ; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: expanded_neg_abs64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: snez a2, a0 -; RV32ZBB-NEXT: neg a3, a1 -; RV32ZBB-NEXT: sub a2, a3, a2 -; RV32ZBB-NEXT: neg a3, a0 -; RV32ZBB-NEXT: beq a2, a1, .LBB2_2 +; RV32ZBB-NEXT: neg a2, a0 +; RV32ZBB-NEXT: snez a3, a0 +; RV32ZBB-NEXT: neg a4, a1 +; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: beq a3, a1, .LBB2_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt a4, a1, a2 +; RV32ZBB-NEXT: slt a4, a1, a3 ; RV32ZBB-NEXT: beqz a4, .LBB2_3 ; RV32ZBB-NEXT: j .LBB2_4 ; RV32ZBB-NEXT: .LBB2_2: -; RV32ZBB-NEXT: sltu a4, a0, a3 +; RV32ZBB-NEXT: sltu a4, a0, a2 ; RV32ZBB-NEXT: bnez a4, .LBB2_4 ; RV32ZBB-NEXT: .LBB2_3: -; RV32ZBB-NEXT: mv a3, a0 -; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a2, a0 +; RV32ZBB-NEXT: mv a3, a1 ; RV32ZBB-NEXT: .LBB2_4: -; RV32ZBB-NEXT: neg a0, a3 -; RV32ZBB-NEXT: snez a1, a3 -; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: neg a0, a2 +; RV32ZBB-NEXT: snez a1, a2 +; RV32ZBB-NEXT: neg a2, a3 ; RV32ZBB-NEXT: sub a1, a2, a1 ; RV32ZBB-NEXT: ret ; @@ -163,49 +163,49 @@ define i64 @expanded_neg_abs64(i64 %x) { define i64 @expanded_neg_abs64_unsigned(i64 %x) { ; RV32I-LABEL: expanded_neg_abs64_unsigned: ; RV32I: # %bb.0: -; RV32I-NEXT: snez a2, a0 -; RV32I-NEXT: neg a3, a1 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: neg a3, a0 -; RV32I-NEXT: beq a2, a1, .LBB3_2 +; RV32I-NEXT: neg a2, a0 +; RV32I-NEXT: snez a3, a0 +; RV32I-NEXT: neg a4, a1 +; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: beq a3, a1, .LBB3_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a4, a1, a2 +; RV32I-NEXT: sltu a4, a1, a3 ; RV32I-NEXT: beqz a4, .LBB3_3 ; RV32I-NEXT: j .LBB3_4 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: bnez a4, .LBB3_4 ; RV32I-NEXT: .LBB3_3: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB3_4: -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a1, a2 +; RV32I-NEXT: neg a2, a3 ; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: expanded_neg_abs64_unsigned: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: snez a2, a0 -; RV32ZBB-NEXT: neg a3, a1 -; RV32ZBB-NEXT: sub a2, a3, a2 -; RV32ZBB-NEXT: neg a3, a0 -; RV32ZBB-NEXT: beq a2, a1, .LBB3_2 +; RV32ZBB-NEXT: neg a2, a0 +; RV32ZBB-NEXT: snez a3, a0 +; RV32ZBB-NEXT: neg a4, a1 +; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: beq a3, a1, .LBB3_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu a4, a1, a2 +; RV32ZBB-NEXT: sltu a4, a1, a3 ; RV32ZBB-NEXT: beqz a4, .LBB3_3 ; RV32ZBB-NEXT: j .LBB3_4 ; RV32ZBB-NEXT: .LBB3_2: -; RV32ZBB-NEXT: sltu a4, a0, a3 +; RV32ZBB-NEXT: sltu a4, a0, a2 ; RV32ZBB-NEXT: bnez a4, .LBB3_4 ; RV32ZBB-NEXT: .LBB3_3: -; RV32ZBB-NEXT: mv a3, a0 -; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a2, a0 +; RV32ZBB-NEXT: mv a3, a1 ; RV32ZBB-NEXT: .LBB3_4: -; RV32ZBB-NEXT: neg a0, a3 -; RV32ZBB-NEXT: snez a1, a3 -; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: neg a0, a2 +; RV32ZBB-NEXT: snez a1, a2 +; RV32ZBB-NEXT: neg a2, a3 ; RV32ZBB-NEXT: sub a1, a2, a1 ; RV32ZBB-NEXT: ret ; @@ -315,49 +315,49 @@ define i32 @expanded_neg_inv_abs32_unsigned(i32 %x) { define i64 @expanded_neg_inv_abs64(i64 %x) { ; RV32I-LABEL: expanded_neg_inv_abs64: ; RV32I: # %bb.0: -; RV32I-NEXT: snez a2, a0 -; RV32I-NEXT: neg a3, a1 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: neg a3, a0 -; RV32I-NEXT: beq a2, a1, .LBB6_2 +; RV32I-NEXT: neg a2, a0 +; RV32I-NEXT: snez a3, a0 +; RV32I-NEXT: neg a4, a1 +; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: beq a3, a1, .LBB6_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a4, a2, a1 +; RV32I-NEXT: slt a4, a3, a1 ; RV32I-NEXT: beqz a4, .LBB6_3 ; RV32I-NEXT: j .LBB6_4 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: sltu a4, a3, a0 +; RV32I-NEXT: sltu a4, a2, a0 ; RV32I-NEXT: bnez a4, .LBB6_4 ; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB6_4: -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a1, a2 +; RV32I-NEXT: neg a2, a3 ; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: expanded_neg_inv_abs64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: snez a2, a0 -; RV32ZBB-NEXT: neg a3, a1 -; RV32ZBB-NEXT: sub a2, a3, a2 -; RV32ZBB-NEXT: neg a3, a0 -; RV32ZBB-NEXT: beq a2, a1, .LBB6_2 +; RV32ZBB-NEXT: neg a2, a0 +; RV32ZBB-NEXT: snez a3, a0 +; RV32ZBB-NEXT: neg a4, a1 +; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: beq a3, a1, .LBB6_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt a4, a2, a1 +; RV32ZBB-NEXT: slt a4, a3, a1 ; RV32ZBB-NEXT: beqz a4, .LBB6_3 ; RV32ZBB-NEXT: j .LBB6_4 ; RV32ZBB-NEXT: .LBB6_2: -; RV32ZBB-NEXT: sltu a4, a3, a0 +; RV32ZBB-NEXT: sltu a4, a2, a0 ; RV32ZBB-NEXT: bnez a4, .LBB6_4 ; RV32ZBB-NEXT: .LBB6_3: -; RV32ZBB-NEXT: mv a3, a0 -; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a2, a0 +; RV32ZBB-NEXT: mv a3, a1 ; RV32ZBB-NEXT: .LBB6_4: -; RV32ZBB-NEXT: neg a0, a3 -; RV32ZBB-NEXT: snez a1, a3 -; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: neg a0, a2 +; RV32ZBB-NEXT: snez a1, a2 +; RV32ZBB-NEXT: neg a2, a3 ; RV32ZBB-NEXT: sub a1, a2, a1 ; RV32ZBB-NEXT: ret ; @@ -385,49 +385,49 @@ define i64 @expanded_neg_inv_abs64(i64 %x) { define i64 @expanded_neg_inv_abs64_unsigned(i64 %x) { ; RV32I-LABEL: expanded_neg_inv_abs64_unsigned: ; RV32I: # %bb.0: -; RV32I-NEXT: snez a2, a0 -; RV32I-NEXT: neg a3, a1 -; RV32I-NEXT: sub a2, a3, a2 -; RV32I-NEXT: neg a3, a0 -; RV32I-NEXT: beq a2, a1, .LBB7_2 +; RV32I-NEXT: neg a2, a0 +; RV32I-NEXT: snez a3, a0 +; RV32I-NEXT: neg a4, a1 +; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: beq a3, a1, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a4, a2, a1 +; RV32I-NEXT: sltu a4, a3, a1 ; RV32I-NEXT: beqz a4, .LBB7_3 ; RV32I-NEXT: j .LBB7_4 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sltu a4, a3, a0 +; RV32I-NEXT: sltu a4, a2, a0 ; RV32I-NEXT: bnez a4, .LBB7_4 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: mv a3, a0 -; RV32I-NEXT: mv a2, a1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: .LBB7_4: -; RV32I-NEXT: neg a0, a3 -; RV32I-NEXT: snez a1, a3 -; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: neg a0, a2 +; RV32I-NEXT: snez a1, a2 +; RV32I-NEXT: neg a2, a3 ; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: expanded_neg_inv_abs64_unsigned: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: snez a2, a0 -; RV32ZBB-NEXT: neg a3, a1 -; RV32ZBB-NEXT: sub a2, a3, a2 -; RV32ZBB-NEXT: neg a3, a0 -; RV32ZBB-NEXT: beq a2, a1, .LBB7_2 +; RV32ZBB-NEXT: neg a2, a0 +; RV32ZBB-NEXT: snez a3, a0 +; RV32ZBB-NEXT: neg a4, a1 +; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: beq a3, a1, .LBB7_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu a4, a2, a1 +; RV32ZBB-NEXT: sltu a4, a3, a1 ; RV32ZBB-NEXT: beqz a4, .LBB7_3 ; RV32ZBB-NEXT: j .LBB7_4 ; RV32ZBB-NEXT: .LBB7_2: -; RV32ZBB-NEXT: sltu a4, a3, a0 +; RV32ZBB-NEXT: sltu a4, a2, a0 ; RV32ZBB-NEXT: bnez a4, .LBB7_4 ; RV32ZBB-NEXT: .LBB7_3: -; RV32ZBB-NEXT: mv a3, a0 -; RV32ZBB-NEXT: mv a2, a1 +; RV32ZBB-NEXT: mv a2, a0 +; RV32ZBB-NEXT: mv a3, a1 ; RV32ZBB-NEXT: .LBB7_4: -; RV32ZBB-NEXT: neg a0, a3 -; RV32ZBB-NEXT: snez a1, a3 -; RV32ZBB-NEXT: neg a2, a2 +; RV32ZBB-NEXT: neg a0, a2 +; RV32ZBB-NEXT: snez a1, a2 +; RV32ZBB-NEXT: neg a2, a3 ; RV32ZBB-NEXT: sub a1, a2, a1 ; RV32ZBB-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll index cb2037f5fb027..28dde9a3472c2 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/double-arith.ll @@ -424,11 +424,11 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: mv s3, a3 ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: lui a1, %hi(.LCPI12_0) -; RV32I-NEXT: addi a1, a1, %lo(.LCPI12_0) -; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: lui a2, %hi(.LCPI12_0) +; RV32I-NEXT: addi a3, a2, %lo(.LCPI12_0) +; RV32I-NEXT: lw a2, 0(a3) +; RV32I-NEXT: lw a3, 4(a3) ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a5, 524288 @@ -454,9 +454,9 @@ define double @fmsub_d(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: lui a0, %hi(.LCPI12_0) -; RV64I-NEXT: ld a1, %lo(.LCPI12_0)(a0) ; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI12_0) +; RV64I-NEXT: ld a1, %lo(.LCPI12_0)(a1) ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a1, a1, 63 @@ -511,20 +511,20 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a3 ; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: mv s3, a5 ; RV32I-NEXT: lui a2, %hi(.LCPI13_0) ; RV32I-NEXT: addi a2, a2, %lo(.LCPI13_0) -; RV32I-NEXT: lw s3, 0(a2) -; RV32I-NEXT: lw s4, 4(a2) -; RV32I-NEXT: mv s5, a5 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: lw s4, 0(a2) +; RV32I-NEXT: lw s5, 4(a2) +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: mv s7, a1 ; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s5 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a5, 524288 @@ -556,14 +556,14 @@ define double @fnmadd_d(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI13_0) -; RV64I-NEXT: ld s1, %lo(.LCPI13_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: ld s2, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 @@ -625,20 +625,20 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: mv s3, a5 ; RV32I-NEXT: lui a2, %hi(.LCPI14_0) ; RV32I-NEXT: addi a2, a2, %lo(.LCPI14_0) -; RV32I-NEXT: lw s3, 0(a2) -; RV32I-NEXT: lw s4, 4(a2) -; RV32I-NEXT: mv s5, a5 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: lw s4, 0(a2) +; RV32I-NEXT: lw s5, 4(a2) +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: mv s7, a1 ; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s5 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv a4, a0 ; RV32I-NEXT: lui a5, 524288 @@ -670,14 +670,14 @@ define double @fnmadd_d_2(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI14_0) -; RV64I-NEXT: ld s1, %lo(.LCPI14_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: ld s2, %lo(.LCPI14_0)(a1) +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a2, a1, 63 @@ -799,11 +799,11 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a3 ; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: mv s3, a5 ; RV32I-NEXT: lui a2, %hi(.LCPI17_0) ; RV32I-NEXT: addi a3, a2, %lo(.LCPI17_0) ; RV32I-NEXT: lw a2, 0(a3) ; RV32I-NEXT: lw a3, 4(a3) -; RV32I-NEXT: mv s3, a5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a1, a1, a2 @@ -827,9 +827,9 @@ define double @fnmsub_d(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI17_0) ; RV64I-NEXT: ld a1, %lo(.LCPI17_0)(a1) -; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a1, a1, 63 @@ -880,11 +880,11 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: mv s3, a5 ; RV32I-NEXT: lui a2, %hi(.LCPI18_0) ; RV32I-NEXT: addi a3, a2, %lo(.LCPI18_0) ; RV32I-NEXT: lw a2, 0(a3) ; RV32I-NEXT: lw a3, 4(a3) -; RV32I-NEXT: mv s3, a5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: lui a3, 524288 @@ -910,9 +910,9 @@ define double @fnmsub_d_2(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI18_0) ; RV64I-NEXT: ld a1, %lo(.LCPI18_0)(a1) -; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: li a1, -1 ; RV64I-NEXT: slli a1, a1, 63 @@ -1009,11 +1009,11 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv s2, a2 ; RV32I-NEXT: mv s3, a3 ; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: lui a1, %hi(.LCPI20_0) -; RV32I-NEXT: addi a1, a1, %lo(.LCPI20_0) -; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: lui a2, %hi(.LCPI20_0) +; RV32I-NEXT: addi a3, a2, %lo(.LCPI20_0) +; RV32I-NEXT: lw a2, 0(a3) +; RV32I-NEXT: lw a3, 4(a3) ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: mv s5, a1 @@ -1044,9 +1044,9 @@ define double @fmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: lui a0, %hi(.LCPI20_0) -; RV64I-NEXT: ld a1, %lo(.LCPI20_0)(a0) ; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI20_0) +; RV64I-NEXT: ld a1, %lo(.LCPI20_0)(a1) ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 @@ -1108,27 +1108,27 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a3 ; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: mv s3, a5 ; RV32I-NEXT: lui a2, %hi(.LCPI21_0) ; RV32I-NEXT: addi a2, a2, %lo(.LCPI21_0) -; RV32I-NEXT: lw s3, 0(a2) -; RV32I-NEXT: lw s4, 4(a2) -; RV32I-NEXT: mv s5, a5 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: lw s4, 0(a2) +; RV32I-NEXT: lw s5, 4(a2) +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: mv s7, a1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv s1, a1 ; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s5 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv a1, s3 +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv s3, a1 @@ -1163,18 +1163,18 @@ define double @fnmadd_d_contract(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI21_0) -; RV64I-NEXT: ld s1, %lo(.LCPI21_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: ld s2, %lo(.LCPI21_0)(a1) +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s3 @@ -1237,20 +1237,20 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv s0, a2 ; RV32I-NEXT: mv s1, a3 ; RV32I-NEXT: mv s2, a4 +; RV32I-NEXT: mv s3, a5 ; RV32I-NEXT: lui a2, %hi(.LCPI22_0) ; RV32I-NEXT: addi a2, a2, %lo(.LCPI22_0) -; RV32I-NEXT: lw s3, 0(a2) -; RV32I-NEXT: lw s4, 4(a2) -; RV32I-NEXT: mv s5, a5 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: lw s4, 0(a2) +; RV32I-NEXT: lw s5, 4(a2) +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv s6, a0 ; RV32I-NEXT: mv s7, a1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: mv a1, s1 -; RV32I-NEXT: mv a2, s3 -; RV32I-NEXT: mv a3, s4 +; RV32I-NEXT: mv a2, s4 +; RV32I-NEXT: mv a3, s5 ; RV32I-NEXT: call __adddf3 ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: mv a3, a1 @@ -1260,7 +1260,7 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV32I-NEXT: mv a2, a0 ; RV32I-NEXT: mv a3, a1 ; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s5 +; RV32I-NEXT: mv a1, s3 ; RV32I-NEXT: call __subdf3 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload @@ -1283,20 +1283,20 @@ define double @fnmsub_d_contract(double %a, double %b, double %c) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI22_0) -; RV64I-NEXT: ld s1, %lo(.LCPI22_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: ld s2, %lo(.LCPI22_0)(a1) +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __adddf3 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __muldf3 ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __subdf3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll index fdeda0c273f6d..676f0f5ec3eb8 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/float-arith.ll @@ -414,9 +414,9 @@ define float @fmsub_s(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: lui a0, %hi(.LCPI12_0) -; RV32I-NEXT: lw a1, %lo(.LCPI12_0)(a0) ; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lui a1, %hi(.LCPI12_0) +; RV32I-NEXT: lw a1, %lo(.LCPI12_0)(a1) ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a2, a0, a2 @@ -437,9 +437,9 @@ define float @fmsub_s(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: lui a0, %hi(.LCPI12_0) -; RV64I-NEXT: lw a1, %lo(.LCPI12_0)(a0) ; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI12_0) +; RV64I-NEXT: lw a1, %lo(.LCPI12_0)(a1) ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a2, 524288 ; RV64I-NEXT: xor a2, a0, a2 @@ -475,14 +475,14 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: lui a1, %hi(.LCPI13_0) -; RV32I-NEXT: lw s1, %lo(.LCPI13_0)(a1) -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: lw s2, %lo(.LCPI13_0)(a1) +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a1, s3, a2 @@ -507,14 +507,14 @@ define float @fnmadd_s(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI13_0) -; RV64I-NEXT: lw s1, %lo(.LCPI13_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: lw s2, %lo(.LCPI13_0)(a1) +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a2, 524288 ; RV64I-NEXT: xor a1, s3, a2 @@ -556,14 +556,14 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: lui a1, %hi(.LCPI14_0) -; RV32I-NEXT: lw s1, %lo(.LCPI14_0)(a1) -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: lw s2, %lo(.LCPI14_0)(a1) +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv s3, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a2, 524288 ; RV32I-NEXT: xor a1, s3, a2 @@ -588,14 +588,14 @@ define float @fnmadd_s_2(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI14_0) -; RV64I-NEXT: lw s1, %lo(.LCPI14_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: lw s2, %lo(.LCPI14_0)(a1) +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv s3, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a2, 524288 ; RV64I-NEXT: xor a1, s3, a2 @@ -720,9 +720,9 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: lui a1, %hi(.LCPI17_0) ; RV32I-NEXT: lw a1, %lo(.LCPI17_0)(a1) -; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a0, a0, a1 @@ -742,9 +742,9 @@ define float @fnmsub_s(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI17_0) ; RV64I-NEXT: lw a1, %lo(.LCPI17_0)(a1) -; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a0, a0, a1 @@ -778,9 +778,9 @@ define float @fnmsub_s_2(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv a0, a1 +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: lui a1, %hi(.LCPI18_0) ; RV32I-NEXT: lw a1, %lo(.LCPI18_0)(a1) -; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: lui a1, 524288 ; RV32I-NEXT: xor a1, a0, a1 @@ -801,9 +801,9 @@ define float @fnmsub_s_2(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv a0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI18_0) ; RV64I-NEXT: lw a1, %lo(.LCPI18_0)(a1) -; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: lui a1, 524288 ; RV64I-NEXT: xor a1, a0, a1 @@ -877,9 +877,9 @@ define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: lui a0, %hi(.LCPI20_0) -; RV32I-NEXT: lw a1, %lo(.LCPI20_0)(a0) ; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: lui a1, %hi(.LCPI20_0) +; RV32I-NEXT: lw a1, %lo(.LCPI20_0)(a1) ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv s2, a0 ; RV32I-NEXT: mv a0, s0 @@ -903,9 +903,9 @@ define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: lui a0, %hi(.LCPI20_0) -; RV64I-NEXT: lw a1, %lo(.LCPI20_0)(a0) ; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: lui a1, %hi(.LCPI20_0) +; RV64I-NEXT: lw a1, %lo(.LCPI20_0)(a1) ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv s2, a0 ; RV64I-NEXT: mv a0, s0 @@ -946,18 +946,18 @@ define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: lui a1, %hi(.LCPI21_0) -; RV32I-NEXT: lw s1, %lo(.LCPI21_0)(a1) -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: lw s2, %lo(.LCPI21_0)(a1) +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: mv a0, s2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: mv a0, s3 @@ -984,18 +984,18 @@ define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI21_0) -; RV64I-NEXT: lw s1, %lo(.LCPI21_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: lw s2, %lo(.LCPI21_0)(a1) +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: mv a0, s2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: mv a0, s3 @@ -1039,20 +1039,20 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a2 ; RV32I-NEXT: lui a1, %hi(.LCPI22_0) -; RV32I-NEXT: lw s1, %lo(.LCPI22_0)(a1) -; RV32I-NEXT: mv s2, a2 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: lw s2, %lo(.LCPI22_0)(a1) +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: mv a1, s1 +; RV32I-NEXT: mv a1, s2 ; RV32I-NEXT: call __addsf3 ; RV32I-NEXT: mv a1, a0 ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call __mulsf3 ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __subsf3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1071,20 +1071,20 @@ define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a2 ; RV64I-NEXT: lui a1, %hi(.LCPI22_0) -; RV64I-NEXT: lw s1, %lo(.LCPI22_0)(a1) -; RV64I-NEXT: mv s2, a2 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: lw s2, %lo(.LCPI22_0)(a1) +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: mv a1, s1 +; RV64I-NEXT: mv a1, s2 ; RV64I-NEXT: call __addsf3 ; RV64I-NEXT: mv a1, a0 ; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call __mulsf3 ; RV64I-NEXT: mv a1, a0 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __subsf3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/freeze.ll b/llvm/test/CodeGen/RISCV/GlobalISel/freeze.ll index 234f338412066..36ff827ebf32a 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/freeze.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/freeze.ll @@ -142,20 +142,20 @@ define i32 @freeze_anonstruct(ptr %p) { define i32 @freeze_anonstruct2(ptr %p) { ; RV32-LABEL: freeze_anonstruct2: ; RV32: # %bb.0: -; RV32-NEXT: lh a1, 4(a0) -; RV32-NEXT: lw a0, 0(a0) -; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: srli a1, a1, 16 -; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lh a0, 4(a0) +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: freeze_anonstruct2: ; RV64: # %bb.0: -; RV64-NEXT: lh a1, 4(a0) -; RV64-NEXT: lw a0, 0(a0) -; RV64-NEXT: slli a1, a1, 48 -; RV64-NEXT: srli a1, a1, 48 -; RV64-NEXT: addw a0, a0, a1 +; RV64-NEXT: lw a1, 0(a0) +; RV64-NEXT: lh a0, 4(a0) +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 +; RV64-NEXT: addw a0, a1, a0 ; RV64-NEXT: ret %s = load {i32, i16}, ptr %p %y1 = freeze {i32, i16} %s @@ -169,20 +169,20 @@ define i32 @freeze_anonstruct2(ptr %p) { define i32 @freeze_anonstruct2_sext(ptr %p) { ; RV32-LABEL: freeze_anonstruct2_sext: ; RV32: # %bb.0: -; RV32-NEXT: lh a1, 4(a0) -; RV32-NEXT: lw a0, 0(a0) -; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: srai a1, a1, 16 -; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lh a0, 4(a0) +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srai a0, a0, 16 +; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: ret ; ; RV64-LABEL: freeze_anonstruct2_sext: ; RV64: # %bb.0: -; RV64-NEXT: lh a1, 4(a0) -; RV64-NEXT: lw a0, 0(a0) -; RV64-NEXT: slli a1, a1, 48 -; RV64-NEXT: srai a1, a1, 48 -; RV64-NEXT: addw a0, a0, a1 +; RV64-NEXT: lw a1, 0(a0) +; RV64-NEXT: lh a0, 4(a0) +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srai a0, a0, 48 +; RV64-NEXT: addw a0, a1, a0 ; RV64-NEXT: ret %s = load {i32, i16}, ptr %p %y1 = freeze {i32, i16} %s diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll index 8a786fc9993d2..6e13179bfe77e 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rotl-rotr.ll @@ -782,8 +782,8 @@ define i32 @rotr_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind { define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64_mask: ; RV32I: # %bb.0: -; RV32I-NEXT: li a5, 32 ; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: li a5, 32 ; RV32I-NEXT: bltu a2, a5, .LBB10_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: li a3, 0 @@ -837,8 +837,8 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; ; RV32ZBB-LABEL: rotl_64_mask: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: li a5, 32 ; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: li a5, 32 ; RV32ZBB-NEXT: bltu a2, a5, .LBB10_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: li a3, 0 @@ -892,8 +892,8 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; ; RV32XTHEADBB-LABEL: rotl_64_mask: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: li a5, 32 ; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: li a5, 32 ; RV32XTHEADBB-NEXT: bltu a2, a5, .LBB10_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: li a3, 0 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll index 9a6c718703a27..e1019c63408ee 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbb.ll @@ -68,8 +68,8 @@ define signext i32 @log2_i32(i32 signext %a) nounwind { ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: sext.w a1, a0 ; RV64I-NEXT: li s0, 31 +; RV64I-NEXT: sext.w a1, a0 ; RV64I-NEXT: beqz a1, .LBB1_2 ; RV64I-NEXT: # %bb.1: # %cond.false ; RV64I-NEXT: srliw a1, a0, 1 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll index 558424b53be95..12afb3adf2f69 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zbkb.ll @@ -115,8 +115,8 @@ define i64 @pack_i64_3(ptr %0, ptr %1) { ; RV64I-LABEL: pack_i64_3: ; RV64I: # %bb.0: ; RV64I-NEXT: lwu a0, 0(a0) -; RV64I-NEXT: lwu a1, 0(a1) ; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: lwu a1, 0(a1) ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll b/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll index 8b262db56ccd2..8bffb0772eeef 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/shifts.ll @@ -503,9 +503,9 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: shl128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: lw a7, 0(a1) ; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: li a6, 64 ; RV32I-NEXT: li t1, 32 ; RV32I-NEXT: neg t5, a2 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/stacksave-stackrestore.ll b/llvm/test/CodeGen/RISCV/GlobalISel/stacksave-stackrestore.ll index caa749729ce19..11912483f8d9c 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/stacksave-stackrestore.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/stacksave-stackrestore.ll @@ -17,10 +17,10 @@ define void @test_scoped_alloca(i64 %n) { ; RV32-NEXT: .cfi_offset s1, -12 ; RV32-NEXT: addi s0, sp, 16 ; RV32-NEXT: .cfi_def_cfa s0, 0 +; RV32-NEXT: mv s1, sp ; RV32-NEXT: addi a0, a0, 15 ; RV32-NEXT: andi a0, a0, -16 ; RV32-NEXT: sub a0, sp, a0 -; RV32-NEXT: mv s1, sp ; RV32-NEXT: mv sp, a0 ; RV32-NEXT: call use_addr ; RV32-NEXT: mv sp, s1 @@ -48,10 +48,10 @@ define void @test_scoped_alloca(i64 %n) { ; RV64-NEXT: .cfi_offset s1, -24 ; RV64-NEXT: addi s0, sp, 32 ; RV64-NEXT: .cfi_def_cfa s0, 0 +; RV64-NEXT: mv s1, sp ; RV64-NEXT: addi a0, a0, 15 ; RV64-NEXT: andi a0, a0, -16 ; RV64-NEXT: sub a0, sp, a0 -; RV64-NEXT: mv s1, sp ; RV64-NEXT: mv sp, a0 ; RV64-NEXT: call use_addr ; RV64-NEXT: mv sp, s1 diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll index fc9be94988451..ba67b45ebbe7d 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll @@ -49,12 +49,12 @@ define i32 @va1(ptr %fmt, ...) { ; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: sw a3, 28(sp) ; RV32-NEXT: sw a4, 32(sp) -; RV32-NEXT: addi a0, sp, 20 -; RV32-NEXT: sw a0, 12(sp) -; RV32-NEXT: lw a0, 12(sp) ; RV32-NEXT: sw a5, 36(sp) ; RV32-NEXT: sw a6, 40(sp) ; RV32-NEXT: sw a7, 44(sp) +; RV32-NEXT: addi a0, sp, 20 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) ; RV32-NEXT: addi a1, a0, 4 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lw a0, 0(a0) @@ -103,12 +103,12 @@ define i32 @va1(ptr %fmt, ...) { ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) -; RV32-WITHFP-NEXT: addi a0, s0, 4 -; RV32-WITHFP-NEXT: sw a0, -12(s0) -; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) +; RV32-WITHFP-NEXT: addi a0, s0, 4 +; RV32-WITHFP-NEXT: sw a0, -12(s0) +; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: addi a1, a0, 4 ; RV32-WITHFP-NEXT: sw a1, -12(s0) ; RV32-WITHFP-NEXT: lw a0, 0(a0) @@ -517,12 +517,12 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; ILP32-NEXT: sw a2, 24(sp) ; ILP32-NEXT: sw a3, 28(sp) ; ILP32-NEXT: sw a4, 32(sp) -; ILP32-NEXT: addi a0, sp, 20 -; ILP32-NEXT: sw a0, 12(sp) -; ILP32-NEXT: lw a0, 12(sp) ; ILP32-NEXT: sw a5, 36(sp) ; ILP32-NEXT: sw a6, 40(sp) ; ILP32-NEXT: sw a7, 44(sp) +; ILP32-NEXT: addi a0, sp, 20 +; ILP32-NEXT: sw a0, 12(sp) +; ILP32-NEXT: lw a0, 12(sp) ; ILP32-NEXT: addi a1, a0, 7 ; ILP32-NEXT: addi a0, a0, 15 ; ILP32-NEXT: andi a1, a1, -8 @@ -635,12 +635,12 @@ define i64 @va2(ptr %fmt, ...) nounwind { ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) -; RV32-WITHFP-NEXT: addi a0, s0, 4 -; RV32-WITHFP-NEXT: sw a0, -12(s0) -; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) +; RV32-WITHFP-NEXT: addi a0, s0, 4 +; RV32-WITHFP-NEXT: sw a0, -12(s0) +; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: addi a1, a0, 7 ; RV32-WITHFP-NEXT: addi a0, a0, 15 ; RV32-WITHFP-NEXT: andi a1, a1, -8 @@ -854,14 +854,14 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32-LABEL: va3: ; ILP32: # %bb.0: ; ILP32-NEXT: addi sp, sp, -32 -; ILP32-NEXT: addi a0, sp, 12 -; ILP32-NEXT: sw a0, 4(sp) -; ILP32-NEXT: lw a0, 4(sp) ; ILP32-NEXT: sw a3, 12(sp) ; ILP32-NEXT: sw a4, 16(sp) ; ILP32-NEXT: sw a5, 20(sp) ; ILP32-NEXT: sw a6, 24(sp) ; ILP32-NEXT: sw a7, 28(sp) +; ILP32-NEXT: addi a0, sp, 12 +; ILP32-NEXT: sw a0, 4(sp) +; ILP32-NEXT: lw a0, 4(sp) ; ILP32-NEXT: addi a3, a0, 7 ; ILP32-NEXT: addi a0, a0, 15 ; ILP32-NEXT: andi a3, a3, -8 @@ -956,13 +956,13 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV64-LABEL: va3: ; RV64: # %bb.0: ; RV64-NEXT: addi sp, sp, -64 -; RV64-NEXT: addi a0, sp, 16 -; RV64-NEXT: sd a0, 8(sp) -; RV64-NEXT: ld a0, 8(sp) ; RV64-NEXT: sd a2, 16(sp) ; RV64-NEXT: sd a3, 24(sp) ; RV64-NEXT: sd a4, 32(sp) ; RV64-NEXT: sd a5, 40(sp) +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) ; RV64-NEXT: sd a6, 48(sp) ; RV64-NEXT: sd a7, 56(sp) ; RV64-NEXT: addi a2, a0, 7 @@ -980,14 +980,14 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32-WITHFP-NEXT: sw ra, 20(sp) # 4-byte Folded Spill ; RV32-WITHFP-NEXT: sw s0, 16(sp) # 4-byte Folded Spill ; RV32-WITHFP-NEXT: addi s0, sp, 24 -; RV32-WITHFP-NEXT: addi a0, s0, 4 -; RV32-WITHFP-NEXT: sw a0, -12(s0) -; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: sw a3, 4(s0) ; RV32-WITHFP-NEXT: sw a4, 8(s0) ; RV32-WITHFP-NEXT: sw a5, 12(s0) ; RV32-WITHFP-NEXT: sw a6, 16(s0) ; RV32-WITHFP-NEXT: sw a7, 20(s0) +; RV32-WITHFP-NEXT: addi a0, s0, 4 +; RV32-WITHFP-NEXT: sw a0, -12(s0) +; RV32-WITHFP-NEXT: lw a0, -12(s0) ; RV32-WITHFP-NEXT: addi a3, a0, 7 ; RV32-WITHFP-NEXT: addi a0, a0, 15 ; RV32-WITHFP-NEXT: andi a3, a3, -8 @@ -1009,13 +1009,13 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV64-WITHFP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64-WITHFP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-WITHFP-NEXT: addi s0, sp, 32 -; RV64-WITHFP-NEXT: mv a0, s0 -; RV64-WITHFP-NEXT: sd a0, -24(s0) -; RV64-WITHFP-NEXT: ld a0, -24(s0) ; RV64-WITHFP-NEXT: sd a2, 0(s0) ; RV64-WITHFP-NEXT: sd a3, 8(s0) ; RV64-WITHFP-NEXT: sd a4, 16(s0) ; RV64-WITHFP-NEXT: sd a5, 24(s0) +; RV64-WITHFP-NEXT: mv a0, s0 +; RV64-WITHFP-NEXT: sd a0, -24(s0) +; RV64-WITHFP-NEXT: ld a0, -24(s0) ; RV64-WITHFP-NEXT: sd a6, 32(s0) ; RV64-WITHFP-NEXT: sd a7, 40(s0) ; RV64-WITHFP-NEXT: addi a2, a0, 7 @@ -1233,14 +1233,14 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV32-NEXT: addi a0, sp, 36 ; RV32-NEXT: sw a0, 16(sp) ; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: addi a0, a0, 3 ; RV32-NEXT: li s0, -4 +; RV32-NEXT: addi a0, a0, 3 ; RV32-NEXT: and a0, a0, s0 ; RV32-NEXT: addi a1, a0, 4 ; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: lw a1, 16(sp) ; RV32-NEXT: lw s1, 0(a0) -; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: lw a0, 12(sp) ; RV32-NEXT: call notdead ; RV32-NEXT: lw a0, 16(sp) @@ -1254,8 +1254,8 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV32-NEXT: and a1, a1, s0 ; RV32-NEXT: addi a2, a1, 4 ; RV32-NEXT: sw a2, 16(sp) -; RV32-NEXT: lw a2, 16(sp) ; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a2, 16(sp) ; RV32-NEXT: addi a2, a2, 3 ; RV32-NEXT: andi a2, a2, -4 ; RV32-NEXT: addi a3, a2, 4 @@ -1286,18 +1286,18 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV64-NEXT: addi a0, sp, 56 ; RV64-NEXT: sd a0, 16(sp) ; RV64-NEXT: ld a0, 16(sp) -; RV64-NEXT: addi a0, a0, 7 ; RV64-NEXT: li s0, -8 +; RV64-NEXT: addi a0, a0, 7 ; RV64-NEXT: and a0, a0, s0 ; RV64-NEXT: addi a1, a0, 8 ; RV64-NEXT: sd a1, 16(sp) ; RV64-NEXT: ld a1, 16(sp) ; RV64-NEXT: ld s1, 0(a0) ; RV64-NEXT: sd a1, 8(sp) -; RV64-NEXT: lw a0, 12(sp) -; RV64-NEXT: lwu a1, 8(sp) -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: lwu a0, 8(sp) +; RV64-NEXT: lw a1, 12(sp) +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: call notdead ; RV64-NEXT: ld a0, 16(sp) ; RV64-NEXT: addi a0, a0, 7 @@ -1310,8 +1310,8 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV64-NEXT: and a1, a1, s0 ; RV64-NEXT: addi a2, a1, 8 ; RV64-NEXT: sd a2, 16(sp) -; RV64-NEXT: ld a2, 16(sp) ; RV64-NEXT: ld a1, 0(a1) +; RV64-NEXT: ld a2, 16(sp) ; RV64-NEXT: addi a2, a2, 7 ; RV64-NEXT: andi a2, a2, -8 ; RV64-NEXT: addi a3, a2, 8 @@ -1344,14 +1344,14 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV32-WITHFP-NEXT: addi a0, s0, 4 ; RV32-WITHFP-NEXT: sw a0, -20(s0) ; RV32-WITHFP-NEXT: lw a0, -20(s0) -; RV32-WITHFP-NEXT: addi a0, a0, 3 ; RV32-WITHFP-NEXT: li s1, -4 +; RV32-WITHFP-NEXT: addi a0, a0, 3 ; RV32-WITHFP-NEXT: and a0, a0, s1 ; RV32-WITHFP-NEXT: addi a1, a0, 4 ; RV32-WITHFP-NEXT: sw a1, -20(s0) -; RV32-WITHFP-NEXT: lw a1, -20(s0) ; RV32-WITHFP-NEXT: lw s2, 0(a0) -; RV32-WITHFP-NEXT: sw a1, -24(s0) +; RV32-WITHFP-NEXT: lw a0, -20(s0) +; RV32-WITHFP-NEXT: sw a0, -24(s0) ; RV32-WITHFP-NEXT: lw a0, -24(s0) ; RV32-WITHFP-NEXT: call notdead ; RV32-WITHFP-NEXT: lw a0, -20(s0) @@ -1365,8 +1365,8 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV32-WITHFP-NEXT: and a1, a1, s1 ; RV32-WITHFP-NEXT: addi a2, a1, 4 ; RV32-WITHFP-NEXT: sw a2, -20(s0) -; RV32-WITHFP-NEXT: lw a2, -20(s0) ; RV32-WITHFP-NEXT: lw a1, 0(a1) +; RV32-WITHFP-NEXT: lw a2, -20(s0) ; RV32-WITHFP-NEXT: addi a2, a2, 3 ; RV32-WITHFP-NEXT: andi a2, a2, -4 ; RV32-WITHFP-NEXT: addi a3, a2, 4 @@ -1400,18 +1400,18 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV64-WITHFP-NEXT: addi a0, s0, 8 ; RV64-WITHFP-NEXT: sd a0, -40(s0) ; RV64-WITHFP-NEXT: ld a0, -40(s0) -; RV64-WITHFP-NEXT: addi a0, a0, 7 ; RV64-WITHFP-NEXT: li s1, -8 +; RV64-WITHFP-NEXT: addi a0, a0, 7 ; RV64-WITHFP-NEXT: and a0, a0, s1 ; RV64-WITHFP-NEXT: addi a1, a0, 8 ; RV64-WITHFP-NEXT: sd a1, -40(s0) ; RV64-WITHFP-NEXT: ld a1, -40(s0) ; RV64-WITHFP-NEXT: ld s2, 0(a0) ; RV64-WITHFP-NEXT: sd a1, -48(s0) -; RV64-WITHFP-NEXT: lw a0, -44(s0) -; RV64-WITHFP-NEXT: lwu a1, -48(s0) -; RV64-WITHFP-NEXT: slli a0, a0, 32 -; RV64-WITHFP-NEXT: or a0, a0, a1 +; RV64-WITHFP-NEXT: lwu a0, -48(s0) +; RV64-WITHFP-NEXT: lw a1, -44(s0) +; RV64-WITHFP-NEXT: slli a1, a1, 32 +; RV64-WITHFP-NEXT: or a0, a1, a0 ; RV64-WITHFP-NEXT: call notdead ; RV64-WITHFP-NEXT: ld a0, -40(s0) ; RV64-WITHFP-NEXT: addi a0, a0, 7 @@ -1424,8 +1424,8 @@ define iXLen @va4_va_copy(i32 %argno, ...) nounwind { ; RV64-WITHFP-NEXT: and a1, a1, s1 ; RV64-WITHFP-NEXT: addi a2, a1, 8 ; RV64-WITHFP-NEXT: sd a2, -40(s0) -; RV64-WITHFP-NEXT: ld a2, -40(s0) ; RV64-WITHFP-NEXT: ld a1, 0(a1) +; RV64-WITHFP-NEXT: ld a2, -40(s0) ; RV64-WITHFP-NEXT: addi a2, a2, 7 ; RV64-WITHFP-NEXT: andi a2, a2, -8 ; RV64-WITHFP-NEXT: addi a3, a2, 8 @@ -1593,19 +1593,19 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: sw a4, 288(a0) ; RV32-NEXT: lui a0, 24414 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: sw a5, 292(a0) +; RV32-NEXT: lui a0, 24414 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: sw a6, 296(a0) +; RV32-NEXT: lui a0, 24414 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: sw a7, 300(a0) +; RV32-NEXT: lui a0, 24414 ; RV32-NEXT: addi a0, a0, 276 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: sw a0, 12(sp) ; RV32-NEXT: lw a0, 12(sp) -; RV32-NEXT: lui a1, 24414 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: sw a5, 292(a1) -; RV32-NEXT: lui a1, 24414 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: sw a6, 296(a1) -; RV32-NEXT: lui a1, 24414 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: sw a7, 300(a1) ; RV32-NEXT: addi a1, a0, 4 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lw a0, 0(a0) @@ -1682,12 +1682,12 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) -; RV32-WITHFP-NEXT: addi a1, s0, 4 -; RV32-WITHFP-NEXT: sw a1, 0(a0) -; RV32-WITHFP-NEXT: lw a1, 0(a0) ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) +; RV32-WITHFP-NEXT: addi a1, s0, 4 +; RV32-WITHFP-NEXT: sw a1, 0(a0) +; RV32-WITHFP-NEXT: lw a1, 0(a0) ; RV32-WITHFP-NEXT: addi a2, a1, 4 ; RV32-WITHFP-NEXT: sw a2, 0(a0) ; RV32-WITHFP-NEXT: lw a0, 0(a1) @@ -1869,12 +1869,12 @@ define i32 @va_printf(ptr %fmt, ...) { ; RV32-NEXT: sw a2, 24(sp) ; RV32-NEXT: sw a3, 28(sp) ; RV32-NEXT: sw a4, 32(sp) -; RV32-NEXT: addi a1, sp, 20 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: lw a1, 8(sp) ; RV32-NEXT: sw a5, 36(sp) ; RV32-NEXT: sw a6, 40(sp) ; RV32-NEXT: sw a7, 44(sp) +; RV32-NEXT: addi a1, sp, 20 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: lw a1, 8(sp) ; RV32-NEXT: call va_vprintf ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra @@ -1892,12 +1892,12 @@ define i32 @va_printf(ptr %fmt, ...) { ; RV64-NEXT: sd a2, 32(sp) ; RV64-NEXT: sd a3, 40(sp) ; RV64-NEXT: sd a4, 48(sp) -; RV64-NEXT: addi a1, sp, 24 -; RV64-NEXT: sd a1, 0(sp) -; RV64-NEXT: ld a1, 0(sp) ; RV64-NEXT: sd a5, 56(sp) ; RV64-NEXT: sd a6, 64(sp) ; RV64-NEXT: sd a7, 72(sp) +; RV64-NEXT: addi a1, sp, 24 +; RV64-NEXT: sd a1, 0(sp) +; RV64-NEXT: ld a1, 0(sp) ; RV64-NEXT: call va_vprintf ; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64-NEXT: .cfi_restore ra @@ -1919,12 +1919,12 @@ define i32 @va_printf(ptr %fmt, ...) { ; RV32-WITHFP-NEXT: sw a2, 8(s0) ; RV32-WITHFP-NEXT: sw a3, 12(s0) ; RV32-WITHFP-NEXT: sw a4, 16(s0) -; RV32-WITHFP-NEXT: addi a1, s0, 4 -; RV32-WITHFP-NEXT: sw a1, -12(s0) -; RV32-WITHFP-NEXT: lw a1, -12(s0) ; RV32-WITHFP-NEXT: sw a5, 20(s0) ; RV32-WITHFP-NEXT: sw a6, 24(s0) ; RV32-WITHFP-NEXT: sw a7, 28(s0) +; RV32-WITHFP-NEXT: addi a1, s0, 4 +; RV32-WITHFP-NEXT: sw a1, -12(s0) +; RV32-WITHFP-NEXT: lw a1, -12(s0) ; RV32-WITHFP-NEXT: call va_vprintf ; RV32-WITHFP-NEXT: .cfi_def_cfa sp, 48 ; RV32-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -1949,12 +1949,12 @@ define i32 @va_printf(ptr %fmt, ...) { ; RV64-WITHFP-NEXT: sd a2, 16(s0) ; RV64-WITHFP-NEXT: sd a3, 24(s0) ; RV64-WITHFP-NEXT: sd a4, 32(s0) -; RV64-WITHFP-NEXT: addi a1, s0, 8 -; RV64-WITHFP-NEXT: sd a1, -24(s0) -; RV64-WITHFP-NEXT: ld a1, -24(s0) ; RV64-WITHFP-NEXT: sd a5, 40(s0) ; RV64-WITHFP-NEXT: sd a6, 48(s0) ; RV64-WITHFP-NEXT: sd a7, 56(s0) +; RV64-WITHFP-NEXT: addi a1, s0, 8 +; RV64-WITHFP-NEXT: sd a1, -24(s0) +; RV64-WITHFP-NEXT: ld a1, -24(s0) ; RV64-WITHFP-NEXT: call va_vprintf ; RV64-WITHFP-NEXT: .cfi_def_cfa sp, 96 ; RV64-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll index bc002fee4417c..47c17d615e0f2 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -5,22 +5,22 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_4bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) ; RV64I-NEXT: lbu a0, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: lbu a4, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu a7, 2(a1) -; RV64I-NEXT: lbu a1, 3(a1) +; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a4, a6, a4 +; RV64I-NEXT: lbu a4, 0(a1) +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu a1, 3(a1) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a0, a0, 16 ; RV64I-NEXT: slli a1, a1, 16 ; RV64I-NEXT: or a0, a0, a3 @@ -40,22 +40,22 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: lshr_4bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a0, 3(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu a7, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a3, a4, a3 ; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a4, a6, a4 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: or a0, a0, a3 @@ -82,22 +82,22 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: shl_4bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) ; RV64I-NEXT: lbu a0, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: lbu a4, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu a7, 2(a1) -; RV64I-NEXT: lbu a1, 3(a1) +; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a4, a6, a4 +; RV64I-NEXT: lbu a4, 0(a1) +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu a1, 3(a1) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a0, a0, 16 ; RV64I-NEXT: slli a1, a1, 16 ; RV64I-NEXT: or a0, a0, a3 @@ -117,22 +117,22 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: shl_4bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a0, 3(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu a7, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a3, a4, a3 ; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a4, a6, a4 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: or a0, a0, a3 @@ -159,22 +159,22 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_4bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) ; RV64I-NEXT: lbu a0, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 -; RV64I-NEXT: lbu a4, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu a7, 2(a1) -; RV64I-NEXT: lbu a1, 3(a1) +; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a0, a0, 8 +; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a4, a6, a4 +; RV64I-NEXT: lbu a4, 0(a1) +; RV64I-NEXT: lbu a5, 1(a1) +; RV64I-NEXT: lbu a6, 2(a1) +; RV64I-NEXT: lbu a1, 3(a1) +; RV64I-NEXT: slli a5, a5, 8 +; RV64I-NEXT: or a4, a5, a4 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a1, a1, a6 ; RV64I-NEXT: slli a0, a0, 16 ; RV64I-NEXT: slli a1, a1, 16 ; RV64I-NEXT: or a0, a0, a3 @@ -194,22 +194,22 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: ashr_4bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 0(a0) +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a0, 3(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: lbu a7, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a3, a4, a3 ; RV32I-NEXT: or a0, a0, a5 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a4, a6, a4 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: or a0, a0, a3 @@ -247,38 +247,38 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: lbu a6, 0(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t2, 3(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu t0, 5(a1) -; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a7, t2, t0 +; RV64I-NEXT: lbu t0, 4(a1) +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: lbu t2, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t3, t3, 8 -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a6, t0, a6 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lui a4, 16 ; RV64I-NEXT: addi a4, a4, -1 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a0, a0, a7 -; RV64I-NEXT: or a5, t2, a5 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a0, a0, a3 @@ -310,54 +310,54 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a4, 1(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a6, 3(a0) -; RV32I-NEXT: lbu a7, 4(a0) -; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: lbu a0, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a7, 5(a0) +; RV32I-NEXT: lbu t0, 6(a0) +; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: or a6, a0, t0 +; RV32I-NEXT: lbu a0, 0(a1) ; RV32I-NEXT: lbu a7, 1(a1) ; RV32I-NEXT: lbu t0, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or t1, a0, t1 ; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: or a7, a7, a0 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: slli a0, a4, 16 +; RV32I-NEXT: slli a0, a5, 16 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: slli a3, t1, 16 -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: slli a1, a1, 3 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a3, a1, 16 +; RV32I-NEXT: or a1, a6, a4 +; RV32I-NEXT: or a3, a3, a7 +; RV32I-NEXT: slli a3, a3, 3 ; RV32I-NEXT: li a4, 32 -; RV32I-NEXT: or a3, a3, a5 -; RV32I-NEXT: bltu a1, a4, .LBB3_2 +; RV32I-NEXT: bltu a3, a4, .LBB3_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srl a5, a3, a1 -; RV32I-NEXT: bnez a1, .LBB3_3 +; RV32I-NEXT: srl a5, a1, a3 +; RV32I-NEXT: bnez a3, .LBB3_3 ; RV32I-NEXT: j .LBB3_4 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: srl a5, a0, a1 -; RV32I-NEXT: neg a6, a1 -; RV32I-NEXT: sll a6, a3, a6 +; RV32I-NEXT: srl a5, a0, a3 +; RV32I-NEXT: neg a6, a3 +; RV32I-NEXT: sll a6, a1, a6 ; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: beqz a1, .LBB3_4 +; RV32I-NEXT: beqz a3, .LBB3_4 ; RV32I-NEXT: .LBB3_3: ; RV32I-NEXT: mv a0, a5 ; RV32I-NEXT: .LBB3_4: -; RV32I-NEXT: bltu a1, a4, .LBB3_6 +; RV32I-NEXT: bltu a3, a4, .LBB3_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: j .LBB3_7 ; RV32I-NEXT: .LBB3_6: -; RV32I-NEXT: srl a1, a3, a1 +; RV32I-NEXT: srl a1, a1, a3 ; RV32I-NEXT: .LBB3_7: ; RV32I-NEXT: srli a3, a0, 16 ; RV32I-NEXT: lui a4, 16 @@ -398,38 +398,38 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: lbu a6, 0(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t2, 3(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu t0, 5(a1) -; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a7, t2, t0 +; RV64I-NEXT: lbu t0, 4(a1) +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: lbu t2, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t3, t3, 8 -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a6, t0, a6 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lui a4, 16 ; RV64I-NEXT: addi a4, a4, -1 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a0, a0, a7 -; RV64I-NEXT: or a5, t2, a5 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a0, a0, a3 @@ -461,34 +461,34 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a4, 1(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a6, 3(a0) -; RV32I-NEXT: lbu a7, 4(a0) -; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: lbu a0, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a7, 5(a0) +; RV32I-NEXT: lbu t0, 6(a0) +; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: or a6, a7, a4 +; RV32I-NEXT: or a0, a0, t0 +; RV32I-NEXT: lbu a4, 0(a1) ; RV32I-NEXT: lbu a7, 1(a1) ; RV32I-NEXT: lbu t0, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or a0, a0, t1 ; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: or a7, a7, a4 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: slli a4, a5, 16 ; RV32I-NEXT: or a4, a4, a3 ; RV32I-NEXT: slli a0, a0, 16 ; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or a3, a1, a6 +; RV32I-NEXT: or a0, a0, a6 +; RV32I-NEXT: or a3, a1, a7 ; RV32I-NEXT: slli a3, a3, 3 ; RV32I-NEXT: li a1, 32 -; RV32I-NEXT: or a0, a0, a5 ; RV32I-NEXT: bltu a3, a1, .LBB4_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: li a1, 0 @@ -544,38 +544,38 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a0, 7(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 0(a1) -; RV64I-NEXT: lbu a6, 1(a1) -; RV64I-NEXT: lbu t2, 2(a1) -; RV64I-NEXT: lbu t3, 3(a1) -; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: lbu a6, 0(a1) +; RV64I-NEXT: lbu a7, 1(a1) +; RV64I-NEXT: lbu t0, 2(a1) +; RV64I-NEXT: lbu t2, 3(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: or a0, a0, t1 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: lbu a6, 4(a1) -; RV64I-NEXT: lbu t0, 5(a1) -; RV64I-NEXT: lbu t1, 6(a1) +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a7, t2, t0 +; RV64I-NEXT: lbu t0, 4(a1) +; RV64I-NEXT: lbu t1, 5(a1) +; RV64I-NEXT: lbu t2, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli t3, t3, 8 -; RV64I-NEXT: or t2, t3, t2 -; RV64I-NEXT: slli t0, t0, 8 -; RV64I-NEXT: or a6, t0, a6 +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, t1 +; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lui a4, 16 ; RV64I-NEXT: addi a4, a4, -1 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: slli a7, a7, 16 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a0, a0, a7 -; RV64I-NEXT: or a5, t2, a5 -; RV64I-NEXT: or a1, a1, a6 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a0, a0, a3 @@ -607,54 +607,54 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a4, 1(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a6, 3(a0) -; RV32I-NEXT: lbu a7, 4(a0) -; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: lbu a0, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a7, 5(a0) +; RV32I-NEXT: lbu t0, 6(a0) +; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: or a6, a0, t0 +; RV32I-NEXT: lbu a0, 0(a1) ; RV32I-NEXT: lbu a7, 1(a1) ; RV32I-NEXT: lbu t0, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or t1, a0, t1 ; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: or a7, a7, a0 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, t0 -; RV32I-NEXT: slli a0, a4, 16 +; RV32I-NEXT: slli a0, a5, 16 ; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: slli a3, t1, 16 -; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or a1, a1, a6 -; RV32I-NEXT: slli a1, a1, 3 +; RV32I-NEXT: slli a6, a6, 16 +; RV32I-NEXT: slli a3, a1, 16 +; RV32I-NEXT: or a1, a6, a4 +; RV32I-NEXT: or a3, a3, a7 +; RV32I-NEXT: slli a3, a3, 3 ; RV32I-NEXT: li a4, 32 -; RV32I-NEXT: or a3, a3, a5 -; RV32I-NEXT: bltu a1, a4, .LBB5_2 +; RV32I-NEXT: bltu a3, a4, .LBB5_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sra a5, a3, a1 -; RV32I-NEXT: bnez a1, .LBB5_3 +; RV32I-NEXT: sra a5, a1, a3 +; RV32I-NEXT: bnez a3, .LBB5_3 ; RV32I-NEXT: j .LBB5_4 ; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: srl a5, a0, a1 -; RV32I-NEXT: neg a6, a1 -; RV32I-NEXT: sll a6, a3, a6 +; RV32I-NEXT: srl a5, a0, a3 +; RV32I-NEXT: neg a6, a3 +; RV32I-NEXT: sll a6, a1, a6 ; RV32I-NEXT: or a5, a5, a6 -; RV32I-NEXT: beqz a1, .LBB5_4 +; RV32I-NEXT: beqz a3, .LBB5_4 ; RV32I-NEXT: .LBB5_3: ; RV32I-NEXT: mv a0, a5 ; RV32I-NEXT: .LBB5_4: -; RV32I-NEXT: bltu a1, a4, .LBB5_6 +; RV32I-NEXT: bltu a3, a4, .LBB5_6 ; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: srai a1, a3, 31 +; RV32I-NEXT: srai a1, a1, 31 ; RV32I-NEXT: j .LBB5_7 ; RV32I-NEXT: .LBB5_6: -; RV32I-NEXT: sra a1, a3, a1 +; RV32I-NEXT: sra a1, a1, a3 ; RV32I-NEXT: .LBB5_7: ; RV32I-NEXT: srli a3, a0, 16 ; RV32I-NEXT: lui a4, 16 @@ -686,8 +686,6 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_16bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lbu a3, 0(a0) ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) @@ -702,81 +700,81 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu t6, 11(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: lbu t0, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: or t2, t4, t3 +; RV64I-NEXT: or t3, t6, t5 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 0(a1) ; RV64I-NEXT: lbu t4, 1(a1) ; RV64I-NEXT: lbu t5, 2(a1) ; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a7, t4, a7 +; RV64I-NEXT: or t0, t6, t5 +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 +; RV64I-NEXT: or a1, a1, t6 ; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or a4, t1, a5 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or a5, t3, t2 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a6, t0, a7 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 +; RV64I-NEXT: or a1, a1, t4 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a6, a0, 32 -; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a7, a0, 32 +; RV64I-NEXT: slli t0, a1, 32 ; RV64I-NEXT: or a0, a4, a3 -; RV64I-NEXT: or a1, a1, a5 -; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: or a1, a7, a5 +; RV64I-NEXT: or a3, t0, a6 +; RV64I-NEXT: slli a3, a3, 3 ; RV64I-NEXT: li a4, 64 -; RV64I-NEXT: or a3, a6, a7 -; RV64I-NEXT: bltu a1, a4, .LBB6_2 +; RV64I-NEXT: bltu a3, a4, .LBB6_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: subw a5, a1, a4 -; RV64I-NEXT: srl a5, a3, a5 -; RV64I-NEXT: bnez a1, .LBB6_3 +; RV64I-NEXT: subw a5, a3, a4 +; RV64I-NEXT: srl a5, a1, a5 +; RV64I-NEXT: bnez a3, .LBB6_3 ; RV64I-NEXT: j .LBB6_4 ; RV64I-NEXT: .LBB6_2: -; RV64I-NEXT: srl a5, a0, a1 -; RV64I-NEXT: negw a6, a1 -; RV64I-NEXT: sll a6, a3, a6 +; RV64I-NEXT: srl a5, a0, a3 +; RV64I-NEXT: negw a6, a3 +; RV64I-NEXT: sll a6, a1, a6 ; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: beqz a1, .LBB6_4 +; RV64I-NEXT: beqz a3, .LBB6_4 ; RV64I-NEXT: .LBB6_3: ; RV64I-NEXT: mv a0, a5 ; RV64I-NEXT: .LBB6_4: -; RV64I-NEXT: bltu a1, a4, .LBB6_6 +; RV64I-NEXT: bltu a3, a4, .LBB6_6 ; RV64I-NEXT: # %bb.5: ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: j .LBB6_7 ; RV64I-NEXT: .LBB6_6: -; RV64I-NEXT: srl a1, a3, a1 +; RV64I-NEXT: srl a1, a1, a3 ; RV64I-NEXT: .LBB6_7: ; RV64I-NEXT: srli a3, a0, 32 ; RV64I-NEXT: srliw a4, a0, 16 @@ -814,8 +812,6 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb a3, 13(a2) ; RV64I-NEXT: sb t4, 14(a2) ; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_16bytes: @@ -833,42 +829,42 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu t1, 6(a0) ; RV32I-NEXT: lbu t2, 7(a0) ; RV32I-NEXT: lbu t4, 8(a0) -; RV32I-NEXT: lbu t5, 9(a0) -; RV32I-NEXT: lbu t6, 10(a0) -; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: lbu t3, 9(a0) +; RV32I-NEXT: lbu t5, 10(a0) +; RV32I-NEXT: lbu t6, 11(a0) ; RV32I-NEXT: slli a7, a7, 8 ; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: slli s0, t3, 8 ; RV32I-NEXT: or t3, a7, a6 ; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu a6, 12(a0) -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu t2, 14(a0) +; RV32I-NEXT: or a6, s0, t4 +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t4, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: or t6, a7, a6 -; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: or t2, t2, a7 +; RV32I-NEXT: or t4, a0, t4 +; RV32I-NEXT: lbu a0, 0(a1) ; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu s0, 2(a1) +; RV32I-NEXT: lbu t6, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or t2, a0, t2 ; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or s1, a7, a6 +; RV32I-NEXT: or s0, a7, a0 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or s0, a1, s0 +; RV32I-NEXT: or t6, a1, t6 ; RV32I-NEXT: li a7, 32 ; RV32I-NEXT: slli a1, a5, 8 ; RV32I-NEXT: slli a0, t0, 8 ; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: or a6, t5, t4 -; RV32I-NEXT: or t0, t2, t6 -; RV32I-NEXT: or a5, s0, s1 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: or a6, t5, a6 +; RV32I-NEXT: or t0, t4, t2 +; RV32I-NEXT: or a5, t6, s0 ; RV32I-NEXT: slli a5, a5, 3 ; RV32I-NEXT: srl t2, a6, a5 ; RV32I-NEXT: neg t5, a5 @@ -1019,8 +1015,6 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_16bytes_wordOff: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lbu a3, 0(a0) ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) @@ -1035,81 +1029,81 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: lbu t6, 11(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: lbu t0, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: or t2, t4, t3 +; RV64I-NEXT: or t3, t6, t5 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 0(a1) ; RV64I-NEXT: lbu t4, 1(a1) ; RV64I-NEXT: lbu t5, 2(a1) ; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a7, t4, a7 +; RV64I-NEXT: or t0, t6, t5 +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 +; RV64I-NEXT: or a1, a1, t6 ; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or a4, t1, a5 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or a5, t3, t2 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a6, t0, a7 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 +; RV64I-NEXT: or a1, a1, t4 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a6, a0, 32 -; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a7, a0, 32 +; RV64I-NEXT: slli t0, a1, 32 ; RV64I-NEXT: or a0, a4, a3 -; RV64I-NEXT: or a1, a1, a5 -; RV64I-NEXT: slli a1, a1, 5 +; RV64I-NEXT: or a1, a7, a5 +; RV64I-NEXT: or a3, t0, a6 +; RV64I-NEXT: slli a3, a3, 5 ; RV64I-NEXT: li a4, 64 -; RV64I-NEXT: or a3, a6, a7 -; RV64I-NEXT: bltu a1, a4, .LBB7_2 +; RV64I-NEXT: bltu a3, a4, .LBB7_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: subw a5, a1, a4 -; RV64I-NEXT: srl a5, a3, a5 -; RV64I-NEXT: bnez a1, .LBB7_3 +; RV64I-NEXT: subw a5, a3, a4 +; RV64I-NEXT: srl a5, a1, a5 +; RV64I-NEXT: bnez a3, .LBB7_3 ; RV64I-NEXT: j .LBB7_4 ; RV64I-NEXT: .LBB7_2: -; RV64I-NEXT: srl a5, a0, a1 -; RV64I-NEXT: negw a6, a1 -; RV64I-NEXT: sll a6, a3, a6 +; RV64I-NEXT: srl a5, a0, a3 +; RV64I-NEXT: negw a6, a3 +; RV64I-NEXT: sll a6, a1, a6 ; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: beqz a1, .LBB7_4 +; RV64I-NEXT: beqz a3, .LBB7_4 ; RV64I-NEXT: .LBB7_3: ; RV64I-NEXT: mv a0, a5 ; RV64I-NEXT: .LBB7_4: -; RV64I-NEXT: bltu a1, a4, .LBB7_6 +; RV64I-NEXT: bltu a3, a4, .LBB7_6 ; RV64I-NEXT: # %bb.5: ; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: j .LBB7_7 ; RV64I-NEXT: .LBB7_6: -; RV64I-NEXT: srl a1, a3, a1 +; RV64I-NEXT: srl a1, a1, a3 ; RV64I-NEXT: .LBB7_7: ; RV64I-NEXT: srli a3, a0, 32 ; RV64I-NEXT: srliw a4, a0, 16 @@ -1147,8 +1141,6 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: sb a3, 13(a2) ; RV64I-NEXT: sb t4, 14(a2) ; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32I-LABEL: lshr_16bytes_wordOff: @@ -1166,42 +1158,42 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: lbu t1, 6(a0) ; RV32I-NEXT: lbu t2, 7(a0) ; RV32I-NEXT: lbu t4, 8(a0) -; RV32I-NEXT: lbu t5, 9(a0) -; RV32I-NEXT: lbu t6, 10(a0) -; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: lbu t3, 9(a0) +; RV32I-NEXT: lbu t5, 10(a0) +; RV32I-NEXT: lbu t6, 11(a0) ; RV32I-NEXT: slli a7, a7, 8 ; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: slli s0, t3, 8 ; RV32I-NEXT: or t3, a7, a6 ; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: lbu a6, 12(a0) -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu t2, 14(a0) +; RV32I-NEXT: or a6, s0, t4 +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t2, 13(a0) +; RV32I-NEXT: lbu t4, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: or t6, a7, a6 -; RV32I-NEXT: lbu a6, 0(a1) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: or t2, t2, a7 +; RV32I-NEXT: or t4, a0, t4 +; RV32I-NEXT: lbu a0, 0(a1) ; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: lbu s0, 2(a1) +; RV32I-NEXT: lbu t6, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or t2, a0, t2 ; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or s1, a7, a6 +; RV32I-NEXT: or s0, a7, a0 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or s0, a1, s0 +; RV32I-NEXT: or t6, a1, t6 ; RV32I-NEXT: li a7, 32 ; RV32I-NEXT: slli a1, a5, 8 ; RV32I-NEXT: slli a0, t0, 8 ; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: or a6, t5, t4 -; RV32I-NEXT: or t0, t2, t6 -; RV32I-NEXT: or a5, s0, s1 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: or a6, t5, a6 +; RV32I-NEXT: or t0, t4, t2 +; RV32I-NEXT: or a5, t6, s0 ; RV32I-NEXT: slli a5, a5, 5 ; RV32I-NEXT: srl t2, a6, a5 ; RV32I-NEXT: neg t5, a5 @@ -1352,8 +1344,6 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: shl_16bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lbu a3, 0(a0) ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) @@ -1368,60 +1358,60 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu t6, 11(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: lbu t0, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: or t2, t4, t3 +; RV64I-NEXT: or t3, t6, t5 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 0(a1) ; RV64I-NEXT: lbu t4, 1(a1) ; RV64I-NEXT: lbu t5, 2(a1) ; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a7, t4, a7 +; RV64I-NEXT: or t0, t6, t5 +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 +; RV64I-NEXT: or a1, a1, t6 ; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or a4, t1, a5 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or a5, t3, t2 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a6, t0, a7 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 +; RV64I-NEXT: or a1, a1, t4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a4, a4, a3 -; RV64I-NEXT: or a1, a1, a5 -; RV64I-NEXT: slli a3, a1, 3 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a3, a1, a6 +; RV64I-NEXT: slli a3, a3, 3 ; RV64I-NEXT: li a5, 64 -; RV64I-NEXT: or a0, a0, a7 ; RV64I-NEXT: bltu a3, a5, .LBB8_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: li a1, 0 @@ -1475,8 +1465,6 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb a3, 13(a2) ; RV64I-NEXT: sb t4, 14(a2) ; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes: @@ -1485,34 +1473,34 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu a4, 1(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a6, 3(a0) -; RV32I-NEXT: lbu a7, 4(a0) -; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: lbu t2, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a7, 5(a0) +; RV32I-NEXT: lbu t0, 6(a0) +; RV32I-NEXT: lbu t1, 7(a0) ; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: or a7, t1, t0 ; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: lbu t0, 1(a1) ; RV32I-NEXT: lbu t1, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, a6 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t2, t0, a6 ; RV32I-NEXT: li a6, 64 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, t1 ; RV32I-NEXT: li t1, 32 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli t2, t0, 16 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or t0, a4, a3 -; RV32I-NEXT: or a4, t2, a5 -; RV32I-NEXT: or a5, a1, a7 +; RV32I-NEXT: or t0, a5, a3 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: or a5, a1, t2 ; RV32I-NEXT: slli a5, a5, 3 ; RV32I-NEXT: neg t3, a5 ; RV32I-NEXT: srl t4, t0, t3 @@ -1533,8 +1521,7 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw s3, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu s3, 11(a0) +; RV32I-NEXT: lbu s0, 11(a0) ; RV32I-NEXT: lbu s1, 15(a0) ; RV32I-NEXT: sub a7, a6, a5 ; RV32I-NEXT: mv a3, a4 @@ -1542,11 +1529,11 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: # %bb.4: ; RV32I-NEXT: mv a3, t5 ; RV32I-NEXT: .LBB8_5: -; RV32I-NEXT: lbu s2, 9(a0) -; RV32I-NEXT: lbu t5, 10(a0) -; RV32I-NEXT: lbu s0, 13(a0) -; RV32I-NEXT: lbu t6, 14(a0) -; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: lbu s3, 9(a0) +; RV32I-NEXT: lbu s2, 10(a0) +; RV32I-NEXT: lbu t6, 13(a0) +; RV32I-NEXT: lbu t5, 14(a0) ; RV32I-NEXT: slli s1, s1, 8 ; RV32I-NEXT: bltu a7, t1, .LBB8_7 ; RV32I-NEXT: # %bb.6: @@ -1557,20 +1544,20 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll s4, a4, s4 ; RV32I-NEXT: or s4, t4, s4 ; RV32I-NEXT: .LBB8_8: -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: lbu s6, 8(a0) -; RV32I-NEXT: lbu s5, 12(a0) -; RV32I-NEXT: or s3, s3, t5 -; RV32I-NEXT: slli t5, s0, 8 -; RV32I-NEXT: or s1, s1, t6 +; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: or s0, s0, s2 +; RV32I-NEXT: lbu s5, 8(a0) +; RV32I-NEXT: lbu s2, 12(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: or s1, s1, t5 ; RV32I-NEXT: mv t4, t0 ; RV32I-NEXT: beqz a7, .LBB8_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv t4, s4 ; RV32I-NEXT: .LBB8_10: -; RV32I-NEXT: or a0, s2, s6 -; RV32I-NEXT: slli s0, s3, 16 -; RV32I-NEXT: or t6, t5, s5 +; RV32I-NEXT: or a0, s3, s5 +; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: or t6, t6, s2 ; RV32I-NEXT: slli s1, s1, 16 ; RV32I-NEXT: bltu a7, t1, .LBB8_12 ; RV32I-NEXT: # %bb.11: @@ -1619,7 +1606,6 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s5, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: bltu a5, a6, .LBB8_24 ; RV32I-NEXT: # %bb.23: @@ -1681,8 +1667,6 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: shl_16bytes_wordOff: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lbu a3, 0(a0) ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) @@ -1697,60 +1681,60 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: lbu t6, 11(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: lbu t0, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: or t2, t4, t3 +; RV64I-NEXT: or t3, t6, t5 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 0(a1) ; RV64I-NEXT: lbu t4, 1(a1) ; RV64I-NEXT: lbu t5, 2(a1) ; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a7, t4, a7 +; RV64I-NEXT: or t0, t6, t5 +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 +; RV64I-NEXT: or a1, a1, t6 ; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or a4, t1, a5 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or a5, t3, t2 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a6, t0, a7 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 +; RV64I-NEXT: or a1, a1, t4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a4, a4, a3 -; RV64I-NEXT: or a1, a1, a5 -; RV64I-NEXT: slli a3, a1, 5 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a3, a1, a6 +; RV64I-NEXT: slli a3, a3, 5 ; RV64I-NEXT: li a5, 64 -; RV64I-NEXT: or a0, a0, a7 ; RV64I-NEXT: bltu a3, a5, .LBB9_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: li a1, 0 @@ -1804,8 +1788,6 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: sb a3, 13(a2) ; RV64I-NEXT: sb t4, 14(a2) ; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32I-LABEL: shl_16bytes_wordOff: @@ -1814,34 +1796,34 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: lbu a4, 1(a0) ; RV32I-NEXT: lbu a5, 2(a0) ; RV32I-NEXT: lbu a6, 3(a0) -; RV32I-NEXT: lbu a7, 4(a0) -; RV32I-NEXT: lbu t0, 5(a0) -; RV32I-NEXT: lbu t1, 6(a0) -; RV32I-NEXT: lbu t2, 7(a0) ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a7, 5(a0) +; RV32I-NEXT: lbu t0, 6(a0) +; RV32I-NEXT: lbu t1, 7(a0) ; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a4, a6, a5 -; RV32I-NEXT: or a5, t0, a7 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: or a7, t1, t0 ; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: lbu t0, 1(a1) ; RV32I-NEXT: lbu t1, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a7, a7, a6 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or t2, t0, a6 ; RV32I-NEXT: li a6, 64 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, t1 ; RV32I-NEXT: li t1, 32 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli t2, t0, 16 +; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 16 -; RV32I-NEXT: or t0, a4, a3 -; RV32I-NEXT: or a4, t2, a5 -; RV32I-NEXT: or a5, a1, a7 +; RV32I-NEXT: or t0, a5, a3 +; RV32I-NEXT: or a4, a7, a4 +; RV32I-NEXT: or a5, a1, t2 ; RV32I-NEXT: slli a5, a5, 5 ; RV32I-NEXT: neg t3, a5 ; RV32I-NEXT: srl t4, t0, t3 @@ -1862,8 +1844,7 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sw s3, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s5, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: sw s6, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu s3, 11(a0) +; RV32I-NEXT: lbu s0, 11(a0) ; RV32I-NEXT: lbu s1, 15(a0) ; RV32I-NEXT: sub a7, a6, a5 ; RV32I-NEXT: mv a3, a4 @@ -1871,11 +1852,11 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: # %bb.4: ; RV32I-NEXT: mv a3, t5 ; RV32I-NEXT: .LBB9_5: -; RV32I-NEXT: lbu s2, 9(a0) -; RV32I-NEXT: lbu t5, 10(a0) -; RV32I-NEXT: lbu s0, 13(a0) -; RV32I-NEXT: lbu t6, 14(a0) -; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: slli s0, s0, 8 +; RV32I-NEXT: lbu s3, 9(a0) +; RV32I-NEXT: lbu s2, 10(a0) +; RV32I-NEXT: lbu t6, 13(a0) +; RV32I-NEXT: lbu t5, 14(a0) ; RV32I-NEXT: slli s1, s1, 8 ; RV32I-NEXT: bltu a7, t1, .LBB9_7 ; RV32I-NEXT: # %bb.6: @@ -1886,20 +1867,20 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sll s4, a4, s4 ; RV32I-NEXT: or s4, t4, s4 ; RV32I-NEXT: .LBB9_8: -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: lbu s6, 8(a0) -; RV32I-NEXT: lbu s5, 12(a0) -; RV32I-NEXT: or s3, s3, t5 -; RV32I-NEXT: slli t5, s0, 8 -; RV32I-NEXT: or s1, s1, t6 +; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: or s0, s0, s2 +; RV32I-NEXT: lbu s5, 8(a0) +; RV32I-NEXT: lbu s2, 12(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: or s1, s1, t5 ; RV32I-NEXT: mv t4, t0 ; RV32I-NEXT: beqz a7, .LBB9_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv t4, s4 ; RV32I-NEXT: .LBB9_10: -; RV32I-NEXT: or a0, s2, s6 -; RV32I-NEXT: slli s0, s3, 16 -; RV32I-NEXT: or t6, t5, s5 +; RV32I-NEXT: or a0, s3, s5 +; RV32I-NEXT: slli s0, s0, 16 +; RV32I-NEXT: or t6, t6, s2 ; RV32I-NEXT: slli s1, s1, 16 ; RV32I-NEXT: bltu a7, t1, .LBB9_12 ; RV32I-NEXT: # %bb.11: @@ -1948,7 +1929,6 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: lw s3, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s4, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s5, 8(sp) # 4-byte Folded Reload -; RV32I-NEXT: lw s6, 4(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: bltu a5, a6, .LBB9_24 ; RV32I-NEXT: # %bb.23: @@ -2011,8 +1991,6 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_16bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lbu a3, 0(a0) ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) @@ -2027,81 +2005,81 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu t6, 11(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: lbu t0, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: or t2, t4, t3 +; RV64I-NEXT: or t3, t6, t5 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 0(a1) ; RV64I-NEXT: lbu t4, 1(a1) ; RV64I-NEXT: lbu t5, 2(a1) ; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a7, t4, a7 +; RV64I-NEXT: or t0, t6, t5 +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 +; RV64I-NEXT: or a1, a1, t6 ; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or a4, t1, a5 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or a5, t3, t2 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a6, t0, a7 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 +; RV64I-NEXT: or a1, a1, t4 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a6, a0, 32 -; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a7, a0, 32 +; RV64I-NEXT: slli t0, a1, 32 ; RV64I-NEXT: or a0, a4, a3 -; RV64I-NEXT: or a1, a1, a5 -; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: or a1, a7, a5 +; RV64I-NEXT: or a3, t0, a6 +; RV64I-NEXT: slli a3, a3, 3 ; RV64I-NEXT: li a4, 64 -; RV64I-NEXT: or a3, a6, a7 -; RV64I-NEXT: bltu a1, a4, .LBB10_2 +; RV64I-NEXT: bltu a3, a4, .LBB10_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: subw a5, a1, a4 -; RV64I-NEXT: sra a5, a3, a5 -; RV64I-NEXT: bnez a1, .LBB10_3 +; RV64I-NEXT: subw a5, a3, a4 +; RV64I-NEXT: sra a5, a1, a5 +; RV64I-NEXT: bnez a3, .LBB10_3 ; RV64I-NEXT: j .LBB10_4 ; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: srl a5, a0, a1 -; RV64I-NEXT: negw a6, a1 -; RV64I-NEXT: sll a6, a3, a6 +; RV64I-NEXT: srl a5, a0, a3 +; RV64I-NEXT: negw a6, a3 +; RV64I-NEXT: sll a6, a1, a6 ; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: beqz a1, .LBB10_4 +; RV64I-NEXT: beqz a3, .LBB10_4 ; RV64I-NEXT: .LBB10_3: ; RV64I-NEXT: mv a0, a5 ; RV64I-NEXT: .LBB10_4: -; RV64I-NEXT: bltu a1, a4, .LBB10_6 +; RV64I-NEXT: bltu a3, a4, .LBB10_6 ; RV64I-NEXT: # %bb.5: -; RV64I-NEXT: srai a1, a3, 63 +; RV64I-NEXT: srai a1, a1, 63 ; RV64I-NEXT: j .LBB10_7 ; RV64I-NEXT: .LBB10_6: -; RV64I-NEXT: sra a1, a3, a1 +; RV64I-NEXT: sra a1, a1, a3 ; RV64I-NEXT: .LBB10_7: ; RV64I-NEXT: srli a3, a0, 32 ; RV64I-NEXT: srliw a4, a0, 16 @@ -2139,8 +2117,6 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb a3, 13(a2) ; RV64I-NEXT: sb t4, 14(a2) ; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_16bytes: @@ -2158,42 +2134,42 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu t0, 6(a0) ; RV32I-NEXT: lbu t1, 7(a0) ; RV32I-NEXT: lbu t4, 8(a0) -; RV32I-NEXT: lbu t5, 9(a0) -; RV32I-NEXT: lbu t6, 10(a0) -; RV32I-NEXT: lbu s0, 11(a0) +; RV32I-NEXT: lbu t3, 9(a0) +; RV32I-NEXT: lbu t5, 10(a0) +; RV32I-NEXT: lbu t6, 11(a0) ; RV32I-NEXT: slli a7, a7, 8 ; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli s0, t3, 8 ; RV32I-NEXT: or t3, a7, a6 ; RV32I-NEXT: or t1, t1, t0 -; RV32I-NEXT: lbu a6, 12(a0) -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu t0, 14(a0) +; RV32I-NEXT: or a6, s0, t4 +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: lbu t4, 14(a0) ; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t6, 1(a1) -; RV32I-NEXT: lbu s0, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or s1, a0, t0 ; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: or t6, t6, a7 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: or t6, t0, a7 +; RV32I-NEXT: or a7, a0, t4 +; RV32I-NEXT: lbu a0, 0(a1) +; RV32I-NEXT: lbu t0, 1(a1) +; RV32I-NEXT: lbu t4, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or s0, t0, a0 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or s0, a1, s0 +; RV32I-NEXT: or t4, a1, t4 ; RV32I-NEXT: li t0, 32 ; RV32I-NEXT: slli a1, a5, 8 ; RV32I-NEXT: slli a0, t2, 8 ; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli s1, s1, 16 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: or a7, t5, t4 -; RV32I-NEXT: or a5, s1, a6 -; RV32I-NEXT: or a6, s0, t6 +; RV32I-NEXT: slli a5, a7, 16 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: or a7, t5, a6 +; RV32I-NEXT: or a5, a5, t6 +; RV32I-NEXT: or a6, t4, s0 ; RV32I-NEXT: slli a6, a6, 3 ; RV32I-NEXT: srl t2, a7, a6 ; RV32I-NEXT: neg t6, a6 @@ -2344,8 +2320,6 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_16bytes_wordOff: ; RV64I: # %bb.0: -; RV64I-NEXT: addi sp, sp, -16 -; RV64I-NEXT: sd s0, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lbu a3, 0(a0) ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) @@ -2360,81 +2334,81 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: lbu t6, 11(a0) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 +; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, a6, a5 -; RV64I-NEXT: lbu a5, 12(a0) -; RV64I-NEXT: lbu a6, 13(a0) -; RV64I-NEXT: lbu s0, 14(a0) +; RV64I-NEXT: or a5, t0, a7 +; RV64I-NEXT: lbu a6, 12(a0) +; RV64I-NEXT: lbu a7, 13(a0) +; RV64I-NEXT: lbu t0, 14(a0) ; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t0, t0, 8 ; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: or t0, t2, t1 -; RV64I-NEXT: or t1, t4, t3 -; RV64I-NEXT: or t2, t6, t5 -; RV64I-NEXT: lbu t3, 0(a1) +; RV64I-NEXT: slli a7, a7, 8 +; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: or t2, t4, t3 +; RV64I-NEXT: or t3, t6, t5 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: lbu a7, 0(a1) ; RV64I-NEXT: lbu t4, 1(a1) ; RV64I-NEXT: lbu t5, 2(a1) ; RV64I-NEXT: lbu t6, 3(a1) -; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli a0, a0, 8 ; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: or a0, a0, s0 -; RV64I-NEXT: or a6, t4, t3 -; RV64I-NEXT: lbu t3, 4(a1) -; RV64I-NEXT: lbu t4, 5(a1) -; RV64I-NEXT: lbu s0, 6(a1) -; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: slli t4, t4, 8 -; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a7, t4, a7 +; RV64I-NEXT: or t0, t6, t5 +; RV64I-NEXT: lbu t4, 4(a1) +; RV64I-NEXT: lbu t5, 5(a1) +; RV64I-NEXT: lbu t6, 6(a1) +; RV64I-NEXT: lbu a1, 7(a1) +; RV64I-NEXT: slli t5, t5, 8 +; RV64I-NEXT: or t4, t5, t4 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or a1, a1, s0 +; RV64I-NEXT: or a1, a1, t6 ; RV64I-NEXT: slli a4, a4, 16 ; RV64I-NEXT: or a3, a4, a3 -; RV64I-NEXT: slli t0, t0, 16 -; RV64I-NEXT: or a4, t0, a7 -; RV64I-NEXT: slli t2, t2, 16 -; RV64I-NEXT: or a7, t2, t1 +; RV64I-NEXT: slli t1, t1, 16 +; RV64I-NEXT: or a4, t1, a5 +; RV64I-NEXT: slli t3, t3, 16 +; RV64I-NEXT: or a5, t3, t2 ; RV64I-NEXT: slli a0, a0, 16 -; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: slli t5, t5, 16 -; RV64I-NEXT: or a5, t5, a6 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a6, t0, a7 ; RV64I-NEXT: slli a1, a1, 16 -; RV64I-NEXT: or a1, a1, t3 +; RV64I-NEXT: or a1, a1, t4 ; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: slli a6, a0, 32 -; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a7, a0, 32 +; RV64I-NEXT: slli t0, a1, 32 ; RV64I-NEXT: or a0, a4, a3 -; RV64I-NEXT: or a1, a1, a5 -; RV64I-NEXT: slli a1, a1, 5 +; RV64I-NEXT: or a1, a7, a5 +; RV64I-NEXT: or a3, t0, a6 +; RV64I-NEXT: slli a3, a3, 5 ; RV64I-NEXT: li a4, 64 -; RV64I-NEXT: or a3, a6, a7 -; RV64I-NEXT: bltu a1, a4, .LBB11_2 +; RV64I-NEXT: bltu a3, a4, .LBB11_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: subw a5, a1, a4 -; RV64I-NEXT: sra a5, a3, a5 -; RV64I-NEXT: bnez a1, .LBB11_3 +; RV64I-NEXT: subw a5, a3, a4 +; RV64I-NEXT: sra a5, a1, a5 +; RV64I-NEXT: bnez a3, .LBB11_3 ; RV64I-NEXT: j .LBB11_4 ; RV64I-NEXT: .LBB11_2: -; RV64I-NEXT: srl a5, a0, a1 -; RV64I-NEXT: negw a6, a1 -; RV64I-NEXT: sll a6, a3, a6 +; RV64I-NEXT: srl a5, a0, a3 +; RV64I-NEXT: negw a6, a3 +; RV64I-NEXT: sll a6, a1, a6 ; RV64I-NEXT: or a5, a5, a6 -; RV64I-NEXT: beqz a1, .LBB11_4 +; RV64I-NEXT: beqz a3, .LBB11_4 ; RV64I-NEXT: .LBB11_3: ; RV64I-NEXT: mv a0, a5 ; RV64I-NEXT: .LBB11_4: -; RV64I-NEXT: bltu a1, a4, .LBB11_6 +; RV64I-NEXT: bltu a3, a4, .LBB11_6 ; RV64I-NEXT: # %bb.5: -; RV64I-NEXT: srai a1, a3, 63 +; RV64I-NEXT: srai a1, a1, 63 ; RV64I-NEXT: j .LBB11_7 ; RV64I-NEXT: .LBB11_6: -; RV64I-NEXT: sra a1, a3, a1 +; RV64I-NEXT: sra a1, a1, a3 ; RV64I-NEXT: .LBB11_7: ; RV64I-NEXT: srli a3, a0, 32 ; RV64I-NEXT: srliw a4, a0, 16 @@ -2472,8 +2446,6 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: sb a3, 13(a2) ; RV64I-NEXT: sb t4, 14(a2) ; RV64I-NEXT: sb t5, 15(a2) -; RV64I-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret ; ; RV32I-LABEL: ashr_16bytes_wordOff: @@ -2491,42 +2463,42 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: lbu t0, 6(a0) ; RV32I-NEXT: lbu t1, 7(a0) ; RV32I-NEXT: lbu t4, 8(a0) -; RV32I-NEXT: lbu t5, 9(a0) -; RV32I-NEXT: lbu t6, 10(a0) -; RV32I-NEXT: lbu s0, 11(a0) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t3, a7, a6 -; RV32I-NEXT: or t1, t1, t0 -; RV32I-NEXT: lbu a6, 12(a0) -; RV32I-NEXT: lbu a7, 13(a0) -; RV32I-NEXT: lbu t0, 14(a0) -; RV32I-NEXT: lbu a0, 15(a0) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: slli s0, s0, 8 -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: or t5, s0, t6 -; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t6, 1(a1) -; RV32I-NEXT: lbu s0, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a0, a0, 8 -; RV32I-NEXT: or s1, a0, t0 +; RV32I-NEXT: lbu t3, 9(a0) +; RV32I-NEXT: lbu t5, 10(a0) +; RV32I-NEXT: lbu t6, 11(a0) +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli s0, t3, 8 +; RV32I-NEXT: or t3, a7, a6 +; RV32I-NEXT: or t1, t1, t0 +; RV32I-NEXT: or a6, s0, t4 +; RV32I-NEXT: lbu a7, 12(a0) +; RV32I-NEXT: lbu t0, 13(a0) +; RV32I-NEXT: lbu t4, 14(a0) +; RV32I-NEXT: lbu a0, 15(a0) ; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: or t6, t6, a7 +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or t5, t6, t5 +; RV32I-NEXT: or t6, t0, a7 +; RV32I-NEXT: or a7, a0, t4 +; RV32I-NEXT: lbu a0, 0(a1) +; RV32I-NEXT: lbu t0, 1(a1) +; RV32I-NEXT: lbu t4, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli t0, t0, 8 +; RV32I-NEXT: or s0, t0, a0 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or s0, a1, s0 +; RV32I-NEXT: or t4, a1, t4 ; RV32I-NEXT: li t0, 32 ; RV32I-NEXT: slli a1, a5, 8 ; RV32I-NEXT: slli a0, t2, 8 ; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli s1, s1, 16 -; RV32I-NEXT: slli s0, s0, 16 -; RV32I-NEXT: or a7, t5, t4 -; RV32I-NEXT: or a5, s1, a6 -; RV32I-NEXT: or a6, s0, t6 +; RV32I-NEXT: slli a5, a7, 16 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: or a7, t5, a6 +; RV32I-NEXT: or a5, a5, t6 +; RV32I-NEXT: or a6, t4, s0 ; RV32I-NEXT: slli a6, a6, 5 ; RV32I-NEXT: srl t2, a7, a6 ; RV32I-NEXT: neg t6, a6 @@ -2713,88 +2685,88 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: or t0, t2, t1 ; RV64I-NEXT: lbu s8, 20(a0) ; RV64I-NEXT: lbu s9, 21(a0) ; RV64I-NEXT: lbu s10, 22(a0) ; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: slli s3, s3, 8 ; RV64I-NEXT: or a4, t4, t3 ; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: or t1, s1, s0 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: lbu t3, 24(a0) +; RV64I-NEXT: lbu t4, 25(a0) +; RV64I-NEXT: lbu t5, 26(a0) +; RV64I-NEXT: lbu t6, 27(a0) ; RV64I-NEXT: slli s5, s5, 8 ; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s9, s9, 8 +; RV64I-NEXT: or s0, s5, s4 +; RV64I-NEXT: or s1, s7, s6 +; RV64I-NEXT: or s2, s9, s8 +; RV64I-NEXT: lbu s3, 28(a0) +; RV64I-NEXT: lbu s4, 29(a0) +; RV64I-NEXT: lbu s5, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s11, s11, 8 +; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 +; RV64I-NEXT: slli s4, s4, 8 ; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: or s0, s1, s0 +; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or t4, t6, t5 +; RV64I-NEXT: or t5, s4, s3 ; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu s3, 1(a1) +; RV64I-NEXT: lbu s4, 2(a1) +; RV64I-NEXT: lbu s7, 3(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 +; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: slli s7, s7, 8 +; RV64I-NEXT: or s5, a0, s5 +; RV64I-NEXT: or t6, s3, t6 +; RV64I-NEXT: or s3, s7, s4 ; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: lbu s4, 5(a1) +; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or s4, s4, a0 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or t0, t3, t2 +; RV64I-NEXT: or s7, a1, s7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a1, t0, a7 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: or a0, t2, t1 +; RV64I-NEXT: slli s1, s1, 16 +; RV64I-NEXT: or s0, s1, s0 ; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 -; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t3, s0, t5 +; RV64I-NEXT: or t0, s6, s2 +; RV64I-NEXT: slli t4, t4, 16 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s5, s5, 16 +; RV64I-NEXT: or t3, s5, t5 ; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 +; RV64I-NEXT: or t5, s3, t6 ; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 +; RV64I-NEXT: or t6, s7, s4 ; RV64I-NEXT: li a7, 64 ; RV64I-NEXT: slli t4, a5, 16 ; RV64I-NEXT: slli t2, a6, 16 -; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a6, t1, t0 -; RV64I-NEXT: or t0, t5, t3 -; RV64I-NEXT: or a5, s0, t6 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: slli t6, t6, 32 +; RV64I-NEXT: or a6, t0, s0 +; RV64I-NEXT: or t0, t3, t1 +; RV64I-NEXT: or a5, t6, t5 ; RV64I-NEXT: slli a5, a5, 3 ; RV64I-NEXT: subw t1, a5, a7 ; RV64I-NEXT: negw t5, a5 @@ -3008,49 +2980,49 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu t0, 21(a0) ; RV32I-NEXT: lbu t1, 22(a0) ; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) ; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 24(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: lbu t4, 26(a0) +; RV32I-NEXT: lbu t5, 27(a0) ; RV32I-NEXT: slli a6, a6, 8 ; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: slli t3, t3, 8 ; RV32I-NEXT: or a6, a6, a5 ; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a4, t2, t1 -; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t4, t4, t3 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 0(a1) +; RV32I-NEXT: or a7, t2, t1 +; RV32I-NEXT: or a4, t3, a4 +; RV32I-NEXT: lbu t0, 28(a0) +; RV32I-NEXT: lbu t1, 29(a0) +; RV32I-NEXT: lbu t2, 30(a0) +; RV32I-NEXT: lbu t3, 31(a0) +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: lbu t2, 0(a1) ; RV32I-NEXT: lbu t3, 1(a1) -; RV32I-NEXT: lbu t6, 2(a1) +; RV32I-NEXT: lbu t5, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: or t0, t3, t0 +; RV32I-NEXT: or t2, t3, t2 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 +; RV32I-NEXT: or t5, a1, t5 ; RV32I-NEXT: li t3, 32 ; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a4, 16 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli a4, t1, 16 -; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: or t1, t5, t4 -; RV32I-NEXT: or t5, a4, a7 -; RV32I-NEXT: or a4, t2, t0 +; RV32I-NEXT: slli a1, a7, 16 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a7, t1, 16 +; RV32I-NEXT: slli t6, t5, 16 +; RV32I-NEXT: or t1, t4, a4 +; RV32I-NEXT: or t5, a7, t0 +; RV32I-NEXT: or a4, t6, t2 ; RV32I-NEXT: slli a4, a4, 3 -; RV32I-NEXT: srl s0, t1, a4 +; RV32I-NEXT: srl s1, t1, a4 ; RV32I-NEXT: neg s6, a4 ; RV32I-NEXT: sll t4, t5, s6 ; RV32I-NEXT: bltu a4, t3, .LBB12_2 @@ -3058,7 +3030,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: srl a7, t5, a4 ; RV32I-NEXT: j .LBB12_3 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: or a7, s0, t4 +; RV32I-NEXT: or a7, s1, t4 ; RV32I-NEXT: .LBB12_3: ; RV32I-NEXT: or t0, a6, a3 ; RV32I-NEXT: or a6, a1, a5 @@ -3072,11 +3044,11 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw a3, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu a4, t3, .LBB12_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: li ra, 0 +; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: srl a3, a6, a4 ; RV32I-NEXT: j .LBB12_8 ; RV32I-NEXT: .LBB12_7: -; RV32I-NEXT: srl ra, t5, a4 +; RV32I-NEXT: srl s0, t5, a4 ; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: .LBB12_8: ; RV32I-NEXT: li t6, 64 @@ -3113,29 +3085,29 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: mv t4, a3 ; RV32I-NEXT: .LBB12_18: ; RV32I-NEXT: neg s11, s9 -; RV32I-NEXT: sw s0, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s9, t3, .LBB12_20 ; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: srl s2, t5, s9 +; RV32I-NEXT: srl s1, t5, s9 ; RV32I-NEXT: j .LBB12_21 ; RV32I-NEXT: .LBB12_20: ; RV32I-NEXT: sll a3, t5, s11 -; RV32I-NEXT: or s2, s0, a3 +; RV32I-NEXT: or s1, s1, a3 ; RV32I-NEXT: .LBB12_21: -; RV32I-NEXT: lbu s1, 11(a0) +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu s2, 11(a0) ; RV32I-NEXT: lbu a3, 15(a0) ; RV32I-NEXT: mv s0, t1 ; RV32I-NEXT: beqz s9, .LBB12_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv s0, s2 +; RV32I-NEXT: mv s0, s1 ; RV32I-NEXT: .LBB12_23: -; RV32I-NEXT: lbu s4, 9(a0) -; RV32I-NEXT: lbu s2, 10(a0) -; RV32I-NEXT: lbu s5, 13(a0) -; RV32I-NEXT: lbu s8, 14(a0) -; RV32I-NEXT: slli s3, s1, 8 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: lbu s3, 9(a0) +; RV32I-NEXT: lbu s4, 10(a0) +; RV32I-NEXT: lbu s8, 13(a0) +; RV32I-NEXT: lbu ra, 14(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: sw ra, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s9, t3, .LBB12_25 ; RV32I-NEXT: # %bb.24: ; RV32I-NEXT: li s1, 0 @@ -3143,12 +3115,12 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB12_25: ; RV32I-NEXT: srl s1, t5, a4 ; RV32I-NEXT: .LBB12_26: -; RV32I-NEXT: or s2, s3, s2 -; RV32I-NEXT: lbu ra, 8(a0) -; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s4, s4, 8 -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s8, a3, s8 +; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: or s2, s2, s4 +; RV32I-NEXT: lbu s5, 8(a0) +; RV32I-NEXT: lbu s4, 12(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or ra, a3, ra ; RV32I-NEXT: bgeu a4, t6, .LBB12_28 ; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: or s0, a7, t2 @@ -3156,10 +3128,10 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB12_28: ; RV32I-NEXT: lbu a3, 3(a0) ; RV32I-NEXT: lbu t2, 7(a0) -; RV32I-NEXT: or a5, s4, ra +; RV32I-NEXT: or a5, s3, s5 ; RV32I-NEXT: slli t4, s2, 16 -; RV32I-NEXT: or s2, s5, s3 -; RV32I-NEXT: slli s3, s8, 16 +; RV32I-NEXT: or s2, s8, s4 +; RV32I-NEXT: slli ra, ra, 16 ; RV32I-NEXT: mv s4, t0 ; RV32I-NEXT: mv a7, a6 ; RV32I-NEXT: beqz a4, .LBB12_30 @@ -3167,25 +3139,25 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: mv s4, s0 ; RV32I-NEXT: mv a7, s1 ; RV32I-NEXT: .LBB12_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) +; RV32I-NEXT: slli s3, a3, 8 +; RV32I-NEXT: lbu s8, 1(a0) ; RV32I-NEXT: lbu a3, 2(a0) ; RV32I-NEXT: lbu s1, 5(a0) ; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s8, t2, 8 +; RV32I-NEXT: slli s5, t2, 8 ; RV32I-NEXT: or t4, t4, a5 -; RV32I-NEXT: or t2, s3, s2 +; RV32I-NEXT: or t2, ra, s2 ; RV32I-NEXT: bltu a4, t6, .LBB12_32 ; RV32I-NEXT: # %bb.31: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: .LBB12_32: -; RV32I-NEXT: slli s3, ra, 8 -; RV32I-NEXT: or a5, s5, a3 -; RV32I-NEXT: lbu s5, 0(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or a5, s3, a3 +; RV32I-NEXT: lbu s3, 0(a0) ; RV32I-NEXT: lbu a0, 4(a0) ; RV32I-NEXT: slli s1, s1, 8 -; RV32I-NEXT: or a3, s8, s0 +; RV32I-NEXT: or a3, s5, s0 ; RV32I-NEXT: srl s2, t4, a4 ; RV32I-NEXT: sll ra, t2, s6 ; RV32I-NEXT: bltu a4, t3, .LBB12_34 @@ -3195,7 +3167,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB12_34: ; RV32I-NEXT: or s0, s2, ra ; RV32I-NEXT: .LBB12_35: -; RV32I-NEXT: or s3, s3, s5 +; RV32I-NEXT: or s3, s8, s3 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: slli a3, a3, 16 @@ -3637,88 +3609,88 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: or t0, t2, t1 ; RV64I-NEXT: lbu s8, 20(a0) ; RV64I-NEXT: lbu s9, 21(a0) ; RV64I-NEXT: lbu s10, 22(a0) ; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: slli s3, s3, 8 ; RV64I-NEXT: or a4, t4, t3 ; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: or t1, s1, s0 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: lbu t3, 24(a0) +; RV64I-NEXT: lbu t4, 25(a0) +; RV64I-NEXT: lbu t5, 26(a0) +; RV64I-NEXT: lbu t6, 27(a0) ; RV64I-NEXT: slli s5, s5, 8 ; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s9, s9, 8 +; RV64I-NEXT: or s0, s5, s4 +; RV64I-NEXT: or s1, s7, s6 +; RV64I-NEXT: or s2, s9, s8 +; RV64I-NEXT: lbu s3, 28(a0) +; RV64I-NEXT: lbu s4, 29(a0) +; RV64I-NEXT: lbu s5, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s11, s11, 8 +; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 +; RV64I-NEXT: slli s4, s4, 8 ; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: or s0, s1, s0 +; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or t4, t6, t5 +; RV64I-NEXT: or t5, s4, s3 ; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu s3, 1(a1) +; RV64I-NEXT: lbu s4, 2(a1) +; RV64I-NEXT: lbu s7, 3(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 +; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: slli s7, s7, 8 +; RV64I-NEXT: or s5, a0, s5 +; RV64I-NEXT: or t6, s3, t6 +; RV64I-NEXT: or s3, s7, s4 ; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: lbu s4, 5(a1) +; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or s4, s4, a0 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or t0, t3, t2 +; RV64I-NEXT: or s7, a1, s7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a1, t0, a7 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: or a0, t2, t1 +; RV64I-NEXT: slli s1, s1, 16 +; RV64I-NEXT: or s0, s1, s0 ; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 -; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t3, s0, t5 +; RV64I-NEXT: or t0, s6, s2 +; RV64I-NEXT: slli t4, t4, 16 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s5, s5, 16 +; RV64I-NEXT: or t3, s5, t5 ; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 +; RV64I-NEXT: or t5, s3, t6 ; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 +; RV64I-NEXT: or t6, s7, s4 ; RV64I-NEXT: li a7, 64 ; RV64I-NEXT: slli t4, a5, 16 ; RV64I-NEXT: slli t2, a6, 16 -; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a6, t1, t0 -; RV64I-NEXT: or t0, t5, t3 -; RV64I-NEXT: or a5, s0, t6 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: slli t6, t6, 32 +; RV64I-NEXT: or a6, t0, s0 +; RV64I-NEXT: or t0, t3, t1 +; RV64I-NEXT: or a5, t6, t5 ; RV64I-NEXT: slli a5, a5, 5 ; RV64I-NEXT: subw t1, a5, a7 ; RV64I-NEXT: negw t5, a5 @@ -3932,49 +3904,49 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: lbu t0, 21(a0) ; RV32I-NEXT: lbu t1, 22(a0) ; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) ; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 24(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: lbu t4, 26(a0) +; RV32I-NEXT: lbu t5, 27(a0) ; RV32I-NEXT: slli a6, a6, 8 ; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: slli t3, t3, 8 ; RV32I-NEXT: or a6, a6, a5 ; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a4, t2, t1 -; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t4, t4, t3 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 0(a1) +; RV32I-NEXT: or a7, t2, t1 +; RV32I-NEXT: or a4, t3, a4 +; RV32I-NEXT: lbu t0, 28(a0) +; RV32I-NEXT: lbu t1, 29(a0) +; RV32I-NEXT: lbu t2, 30(a0) +; RV32I-NEXT: lbu t3, 31(a0) +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: lbu t2, 0(a1) ; RV32I-NEXT: lbu t3, 1(a1) -; RV32I-NEXT: lbu t6, 2(a1) +; RV32I-NEXT: lbu t5, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: or t0, t3, t0 +; RV32I-NEXT: or t2, t3, t2 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 +; RV32I-NEXT: or t5, a1, t5 ; RV32I-NEXT: li t3, 32 ; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a4, 16 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli a4, t1, 16 -; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: or t1, t5, t4 -; RV32I-NEXT: or t5, a4, a7 -; RV32I-NEXT: or a4, t2, t0 +; RV32I-NEXT: slli a1, a7, 16 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a7, t1, 16 +; RV32I-NEXT: slli t6, t5, 16 +; RV32I-NEXT: or t1, t4, a4 +; RV32I-NEXT: or t5, a7, t0 +; RV32I-NEXT: or a4, t6, t2 ; RV32I-NEXT: slli a4, a4, 5 -; RV32I-NEXT: srl s0, t1, a4 +; RV32I-NEXT: srl s1, t1, a4 ; RV32I-NEXT: neg s6, a4 ; RV32I-NEXT: sll t4, t5, s6 ; RV32I-NEXT: bltu a4, t3, .LBB13_2 @@ -3982,7 +3954,7 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: srl a7, t5, a4 ; RV32I-NEXT: j .LBB13_3 ; RV32I-NEXT: .LBB13_2: -; RV32I-NEXT: or a7, s0, t4 +; RV32I-NEXT: or a7, s1, t4 ; RV32I-NEXT: .LBB13_3: ; RV32I-NEXT: or t0, a6, a3 ; RV32I-NEXT: or a6, a1, a5 @@ -3996,11 +3968,11 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: sw a3, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu a4, t3, .LBB13_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: li ra, 0 +; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: srl a3, a6, a4 ; RV32I-NEXT: j .LBB13_8 ; RV32I-NEXT: .LBB13_7: -; RV32I-NEXT: srl ra, t5, a4 +; RV32I-NEXT: srl s0, t5, a4 ; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: .LBB13_8: ; RV32I-NEXT: li t6, 64 @@ -4037,29 +4009,29 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: mv t4, a3 ; RV32I-NEXT: .LBB13_18: ; RV32I-NEXT: neg s11, s9 -; RV32I-NEXT: sw s0, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s9, t3, .LBB13_20 ; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: srl s2, t5, s9 +; RV32I-NEXT: srl s1, t5, s9 ; RV32I-NEXT: j .LBB13_21 ; RV32I-NEXT: .LBB13_20: ; RV32I-NEXT: sll a3, t5, s11 -; RV32I-NEXT: or s2, s0, a3 +; RV32I-NEXT: or s1, s1, a3 ; RV32I-NEXT: .LBB13_21: -; RV32I-NEXT: lbu s1, 11(a0) +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu s2, 11(a0) ; RV32I-NEXT: lbu a3, 15(a0) ; RV32I-NEXT: mv s0, t1 ; RV32I-NEXT: beqz s9, .LBB13_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv s0, s2 +; RV32I-NEXT: mv s0, s1 ; RV32I-NEXT: .LBB13_23: -; RV32I-NEXT: lbu s4, 9(a0) -; RV32I-NEXT: lbu s2, 10(a0) -; RV32I-NEXT: lbu s5, 13(a0) -; RV32I-NEXT: lbu s8, 14(a0) -; RV32I-NEXT: slli s3, s1, 8 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: lbu s3, 9(a0) +; RV32I-NEXT: lbu s4, 10(a0) +; RV32I-NEXT: lbu s8, 13(a0) +; RV32I-NEXT: lbu ra, 14(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: sw ra, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s9, t3, .LBB13_25 ; RV32I-NEXT: # %bb.24: ; RV32I-NEXT: li s1, 0 @@ -4067,12 +4039,12 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: .LBB13_25: ; RV32I-NEXT: srl s1, t5, a4 ; RV32I-NEXT: .LBB13_26: -; RV32I-NEXT: or s2, s3, s2 -; RV32I-NEXT: lbu ra, 8(a0) -; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s4, s4, 8 -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s8, a3, s8 +; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: or s2, s2, s4 +; RV32I-NEXT: lbu s5, 8(a0) +; RV32I-NEXT: lbu s4, 12(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or ra, a3, ra ; RV32I-NEXT: bgeu a4, t6, .LBB13_28 ; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: or s0, a7, t2 @@ -4080,10 +4052,10 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: .LBB13_28: ; RV32I-NEXT: lbu a3, 3(a0) ; RV32I-NEXT: lbu t2, 7(a0) -; RV32I-NEXT: or a5, s4, ra +; RV32I-NEXT: or a5, s3, s5 ; RV32I-NEXT: slli t4, s2, 16 -; RV32I-NEXT: or s2, s5, s3 -; RV32I-NEXT: slli s3, s8, 16 +; RV32I-NEXT: or s2, s8, s4 +; RV32I-NEXT: slli ra, ra, 16 ; RV32I-NEXT: mv s4, t0 ; RV32I-NEXT: mv a7, a6 ; RV32I-NEXT: beqz a4, .LBB13_30 @@ -4091,25 +4063,25 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: mv s4, s0 ; RV32I-NEXT: mv a7, s1 ; RV32I-NEXT: .LBB13_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) +; RV32I-NEXT: slli s3, a3, 8 +; RV32I-NEXT: lbu s8, 1(a0) ; RV32I-NEXT: lbu a3, 2(a0) ; RV32I-NEXT: lbu s1, 5(a0) ; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s8, t2, 8 +; RV32I-NEXT: slli s5, t2, 8 ; RV32I-NEXT: or t4, t4, a5 -; RV32I-NEXT: or t2, s3, s2 +; RV32I-NEXT: or t2, ra, s2 ; RV32I-NEXT: bltu a4, t6, .LBB13_32 ; RV32I-NEXT: # %bb.31: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: .LBB13_32: -; RV32I-NEXT: slli s3, ra, 8 -; RV32I-NEXT: or a5, s5, a3 -; RV32I-NEXT: lbu s5, 0(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or a5, s3, a3 +; RV32I-NEXT: lbu s3, 0(a0) ; RV32I-NEXT: lbu a0, 4(a0) ; RV32I-NEXT: slli s1, s1, 8 -; RV32I-NEXT: or a3, s8, s0 +; RV32I-NEXT: or a3, s5, s0 ; RV32I-NEXT: srl s2, t4, a4 ; RV32I-NEXT: sll ra, t2, s6 ; RV32I-NEXT: bltu a4, t3, .LBB13_34 @@ -4119,7 +4091,7 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: .LBB13_34: ; RV32I-NEXT: or s0, s2, ra ; RV32I-NEXT: .LBB13_35: -; RV32I-NEXT: or s3, s3, s5 +; RV32I-NEXT: or s3, s8, s3 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: slli a3, a3, 16 @@ -4561,88 +4533,88 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: or t0, t2, t1 ; RV64I-NEXT: lbu s8, 20(a0) ; RV64I-NEXT: lbu s9, 21(a0) ; RV64I-NEXT: lbu s10, 22(a0) ; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: slli s3, s3, 8 ; RV64I-NEXT: or a4, t4, t3 ; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: or t1, s1, s0 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: lbu t3, 24(a0) +; RV64I-NEXT: lbu t4, 25(a0) +; RV64I-NEXT: lbu t5, 26(a0) +; RV64I-NEXT: lbu t6, 27(a0) ; RV64I-NEXT: slli s5, s5, 8 ; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s9, s9, 8 +; RV64I-NEXT: or s0, s5, s4 +; RV64I-NEXT: or s1, s7, s6 +; RV64I-NEXT: or s2, s9, s8 +; RV64I-NEXT: lbu s3, 28(a0) +; RV64I-NEXT: lbu s4, 29(a0) +; RV64I-NEXT: lbu s5, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s11, s11, 8 +; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 +; RV64I-NEXT: slli s4, s4, 8 ; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: or s0, s1, s0 +; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or t4, t6, t5 +; RV64I-NEXT: or t5, s4, s3 ; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu s3, 1(a1) +; RV64I-NEXT: lbu s4, 2(a1) +; RV64I-NEXT: lbu s7, 3(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 +; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: slli s7, s7, 8 +; RV64I-NEXT: or s5, a0, s5 +; RV64I-NEXT: or t6, s3, t6 +; RV64I-NEXT: or s3, s7, s4 ; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: lbu s4, 5(a1) +; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or s4, s4, a0 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or t0, t3, t2 +; RV64I-NEXT: or s7, a1, s7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a1, t0, a7 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: or a0, t2, t1 +; RV64I-NEXT: slli s1, s1, 16 +; RV64I-NEXT: or s0, s1, s0 ; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 -; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t3, s0, t5 +; RV64I-NEXT: or t0, s6, s2 +; RV64I-NEXT: slli t4, t4, 16 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s5, s5, 16 +; RV64I-NEXT: or t3, s5, t5 ; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 +; RV64I-NEXT: or t5, s3, t6 ; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 +; RV64I-NEXT: or t6, s7, s4 ; RV64I-NEXT: li a7, 64 ; RV64I-NEXT: slli t4, a5, 16 ; RV64I-NEXT: slli t2, a6, 16 -; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a6, t1, t0 -; RV64I-NEXT: or t0, t5, t3 -; RV64I-NEXT: or a5, s0, t6 +; RV64I-NEXT: slli t0, t0, 32 +; RV64I-NEXT: slli t3, t3, 32 +; RV64I-NEXT: slli t6, t6, 32 +; RV64I-NEXT: or a6, t0, s0 +; RV64I-NEXT: or t0, t3, t1 +; RV64I-NEXT: or a5, t6, t5 ; RV64I-NEXT: slli a5, a5, 6 ; RV64I-NEXT: subw t1, a5, a7 ; RV64I-NEXT: negw t5, a5 @@ -4856,49 +4828,49 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: lbu t0, 21(a0) ; RV32I-NEXT: lbu t1, 22(a0) ; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) ; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 24(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: lbu t4, 26(a0) +; RV32I-NEXT: lbu t5, 27(a0) ; RV32I-NEXT: slli a6, a6, 8 ; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: slli t3, t3, 8 ; RV32I-NEXT: or a6, a6, a5 ; RV32I-NEXT: or a5, t0, a7 -; RV32I-NEXT: or a4, t2, t1 -; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t4, t4, t3 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: lbu t0, 0(a1) +; RV32I-NEXT: or a7, t2, t1 +; RV32I-NEXT: or a4, t3, a4 +; RV32I-NEXT: lbu t0, 28(a0) +; RV32I-NEXT: lbu t1, 29(a0) +; RV32I-NEXT: lbu t2, 30(a0) +; RV32I-NEXT: lbu t3, 31(a0) +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: or t1, t3, t2 +; RV32I-NEXT: lbu t2, 0(a1) ; RV32I-NEXT: lbu t3, 1(a1) -; RV32I-NEXT: lbu t6, 2(a1) +; RV32I-NEXT: lbu t5, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: slli t3, t3, 8 -; RV32I-NEXT: or t0, t3, t0 +; RV32I-NEXT: or t2, t3, t2 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 +; RV32I-NEXT: or t5, a1, t5 ; RV32I-NEXT: li t3, 32 ; RV32I-NEXT: slli a6, a6, 16 -; RV32I-NEXT: slli a1, a4, 16 -; RV32I-NEXT: slli t5, t5, 16 -; RV32I-NEXT: slli a4, t1, 16 -; RV32I-NEXT: slli t2, t2, 16 -; RV32I-NEXT: or t1, t5, t4 -; RV32I-NEXT: or t5, a4, a7 -; RV32I-NEXT: or a4, t2, t0 +; RV32I-NEXT: slli a1, a7, 16 +; RV32I-NEXT: slli t4, t4, 16 +; RV32I-NEXT: slli a7, t1, 16 +; RV32I-NEXT: slli t6, t5, 16 +; RV32I-NEXT: or t1, t4, a4 +; RV32I-NEXT: or t5, a7, t0 +; RV32I-NEXT: or a4, t6, t2 ; RV32I-NEXT: slli a4, a4, 6 -; RV32I-NEXT: srl s0, t1, a4 +; RV32I-NEXT: srl s1, t1, a4 ; RV32I-NEXT: neg s6, a4 ; RV32I-NEXT: sll t4, t5, s6 ; RV32I-NEXT: bltu a4, t3, .LBB14_2 @@ -4906,7 +4878,7 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: srl a7, t5, a4 ; RV32I-NEXT: j .LBB14_3 ; RV32I-NEXT: .LBB14_2: -; RV32I-NEXT: or a7, s0, t4 +; RV32I-NEXT: or a7, s1, t4 ; RV32I-NEXT: .LBB14_3: ; RV32I-NEXT: or t0, a6, a3 ; RV32I-NEXT: or a6, a1, a5 @@ -4920,11 +4892,11 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: sw a3, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu a4, t3, .LBB14_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: li ra, 0 +; RV32I-NEXT: li s0, 0 ; RV32I-NEXT: srl a3, a6, a4 ; RV32I-NEXT: j .LBB14_8 ; RV32I-NEXT: .LBB14_7: -; RV32I-NEXT: srl ra, t5, a4 +; RV32I-NEXT: srl s0, t5, a4 ; RV32I-NEXT: or a3, a3, a5 ; RV32I-NEXT: .LBB14_8: ; RV32I-NEXT: li t6, 64 @@ -4961,29 +4933,29 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: mv t4, a3 ; RV32I-NEXT: .LBB14_18: ; RV32I-NEXT: neg s11, s9 -; RV32I-NEXT: sw s0, 36(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s9, t3, .LBB14_20 ; RV32I-NEXT: # %bb.19: -; RV32I-NEXT: srl s2, t5, s9 +; RV32I-NEXT: srl s1, t5, s9 ; RV32I-NEXT: j .LBB14_21 ; RV32I-NEXT: .LBB14_20: ; RV32I-NEXT: sll a3, t5, s11 -; RV32I-NEXT: or s2, s0, a3 +; RV32I-NEXT: or s1, s1, a3 ; RV32I-NEXT: .LBB14_21: -; RV32I-NEXT: lbu s1, 11(a0) +; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: lbu s2, 11(a0) ; RV32I-NEXT: lbu a3, 15(a0) ; RV32I-NEXT: mv s0, t1 ; RV32I-NEXT: beqz s9, .LBB14_23 ; RV32I-NEXT: # %bb.22: -; RV32I-NEXT: mv s0, s2 +; RV32I-NEXT: mv s0, s1 ; RV32I-NEXT: .LBB14_23: -; RV32I-NEXT: lbu s4, 9(a0) -; RV32I-NEXT: lbu s2, 10(a0) -; RV32I-NEXT: lbu s5, 13(a0) -; RV32I-NEXT: lbu s8, 14(a0) -; RV32I-NEXT: slli s3, s1, 8 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: lbu s3, 9(a0) +; RV32I-NEXT: lbu s4, 10(a0) +; RV32I-NEXT: lbu s8, 13(a0) +; RV32I-NEXT: lbu ra, 14(a0) ; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: sw ra, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s9, t3, .LBB14_25 ; RV32I-NEXT: # %bb.24: ; RV32I-NEXT: li s1, 0 @@ -4991,12 +4963,12 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: .LBB14_25: ; RV32I-NEXT: srl s1, t5, a4 ; RV32I-NEXT: .LBB14_26: -; RV32I-NEXT: or s2, s3, s2 -; RV32I-NEXT: lbu ra, 8(a0) -; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s4, s4, 8 -; RV32I-NEXT: slli s5, s5, 8 -; RV32I-NEXT: or s8, a3, s8 +; RV32I-NEXT: slli s3, s3, 8 +; RV32I-NEXT: or s2, s2, s4 +; RV32I-NEXT: lbu s5, 8(a0) +; RV32I-NEXT: lbu s4, 12(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or ra, a3, ra ; RV32I-NEXT: bgeu a4, t6, .LBB14_28 ; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: or s0, a7, t2 @@ -5004,10 +4976,10 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: .LBB14_28: ; RV32I-NEXT: lbu a3, 3(a0) ; RV32I-NEXT: lbu t2, 7(a0) -; RV32I-NEXT: or a5, s4, ra +; RV32I-NEXT: or a5, s3, s5 ; RV32I-NEXT: slli t4, s2, 16 -; RV32I-NEXT: or s2, s5, s3 -; RV32I-NEXT: slli s3, s8, 16 +; RV32I-NEXT: or s2, s8, s4 +; RV32I-NEXT: slli ra, ra, 16 ; RV32I-NEXT: mv s4, t0 ; RV32I-NEXT: mv a7, a6 ; RV32I-NEXT: beqz a4, .LBB14_30 @@ -5015,25 +4987,25 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: mv s4, s0 ; RV32I-NEXT: mv a7, s1 ; RV32I-NEXT: .LBB14_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) +; RV32I-NEXT: slli s3, a3, 8 +; RV32I-NEXT: lbu s8, 1(a0) ; RV32I-NEXT: lbu a3, 2(a0) ; RV32I-NEXT: lbu s1, 5(a0) ; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s8, t2, 8 +; RV32I-NEXT: slli s5, t2, 8 ; RV32I-NEXT: or t4, t4, a5 -; RV32I-NEXT: or t2, s3, s2 +; RV32I-NEXT: or t2, ra, s2 ; RV32I-NEXT: bltu a4, t6, .LBB14_32 ; RV32I-NEXT: # %bb.31: ; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: .LBB14_32: -; RV32I-NEXT: slli s3, ra, 8 -; RV32I-NEXT: or a5, s5, a3 -; RV32I-NEXT: lbu s5, 0(a0) +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or a5, s3, a3 +; RV32I-NEXT: lbu s3, 0(a0) ; RV32I-NEXT: lbu a0, 4(a0) ; RV32I-NEXT: slli s1, s1, 8 -; RV32I-NEXT: or a3, s8, s0 +; RV32I-NEXT: or a3, s5, s0 ; RV32I-NEXT: srl s2, t4, a4 ; RV32I-NEXT: sll ra, t2, s6 ; RV32I-NEXT: bltu a4, t3, .LBB14_34 @@ -5043,7 +5015,7 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: .LBB14_34: ; RV32I-NEXT: or s0, s2, ra ; RV32I-NEXT: .LBB14_35: -; RV32I-NEXT: or s3, s3, s5 +; RV32I-NEXT: or s3, s8, s3 ; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: slli a3, a3, 16 @@ -5784,54 +5756,54 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli t3, t3, 8 ; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: lbu t0, 0(a1) -; RV32I-NEXT: lbu t1, 1(a1) -; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: or t0, t3, t2 +; RV32I-NEXT: lbu t1, 0(a1) +; RV32I-NEXT: lbu t2, 1(a1) ; RV32I-NEXT: lbu t3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: li s9, 64 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: li t4, 32 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: or t3, a5, a4 -; RV32I-NEXT: or a5, t2, a7 -; RV32I-NEXT: or a4, a1, t0 +; RV32I-NEXT: or a5, t0, a7 +; RV32I-NEXT: or a4, a1, t1 ; RV32I-NEXT: slli a4, a4, 3 -; RV32I-NEXT: neg s10, a4 -; RV32I-NEXT: srl t5, t3, s10 -; RV32I-NEXT: sll s5, a5, a4 +; RV32I-NEXT: neg s5, a4 +; RV32I-NEXT: srl t5, t3, s5 +; RV32I-NEXT: sll s10, a5, a4 ; RV32I-NEXT: bltu a4, t4, .LBB15_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: sll a7, t3, a4 +; RV32I-NEXT: sll t0, t3, a4 ; RV32I-NEXT: j .LBB15_3 ; RV32I-NEXT: .LBB15_2: ; RV32I-NEXT: sll s8, t3, a4 -; RV32I-NEXT: or a7, t5, s5 +; RV32I-NEXT: or t0, t5, s10 ; RV32I-NEXT: .LBB15_3: +; RV32I-NEXT: slli a6, a6, 8 ; RV32I-NEXT: lbu t2, 9(a0) -; RV32I-NEXT: lbu a1, 10(a0) +; RV32I-NEXT: lbu a7, 10(a0) ; RV32I-NEXT: lbu t1, 13(a0) -; RV32I-NEXT: lbu t0, 14(a0) -; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: lbu a1, 14(a0) ; RV32I-NEXT: slli t6, a3, 8 ; RV32I-NEXT: sub s6, s9, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: beqz a4, .LBB15_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB15_5: -; RV32I-NEXT: slli a7, t2, 8 -; RV32I-NEXT: or a6, a6, a1 +; RV32I-NEXT: slli t0, t2, 8 +; RV32I-NEXT: or a6, a6, a7 ; RV32I-NEXT: lbu t2, 8(a0) -; RV32I-NEXT: lbu a1, 12(a0) +; RV32I-NEXT: lbu a7, 12(a0) ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t0, t6, t0 +; RV32I-NEXT: or a1, t6, a1 ; RV32I-NEXT: neg t6, s6 ; RV32I-NEXT: sw t6, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s6, t4, .LBB15_7 @@ -5842,25 +5814,25 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll t6, a5, t6 ; RV32I-NEXT: or t6, t5, t6 ; RV32I-NEXT: .LBB15_8: -; RV32I-NEXT: or a7, a7, t2 +; RV32I-NEXT: or t0, t0, t2 ; RV32I-NEXT: slli t2, a6, 16 -; RV32I-NEXT: or a1, t1, a1 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: or a7, t1, a7 +; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: mv a6, t3 ; RV32I-NEXT: beqz s6, .LBB15_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a6, t6 ; RV32I-NEXT: .LBB15_10: -; RV32I-NEXT: or t1, t2, a7 -; RV32I-NEXT: or t2, t0, a1 +; RV32I-NEXT: or t1, t2, t0 +; RV32I-NEXT: or t2, a1, a7 ; RV32I-NEXT: bltu s6, t4, .LBB15_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li t0, 0 ; RV32I-NEXT: j .LBB15_13 ; RV32I-NEXT: .LBB15_12: -; RV32I-NEXT: srl a7, a5, s10 +; RV32I-NEXT: srl t0, a5, s5 ; RV32I-NEXT: .LBB15_13: -; RV32I-NEXT: srl s0, t1, s10 +; RV32I-NEXT: srl s0, t1, s5 ; RV32I-NEXT: sll a1, t2, a4 ; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu a4, t4, .LBB15_15 @@ -5890,7 +5862,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB15_20: ; RV32I-NEXT: sll s2, t3, a4 ; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: or a1, a1, s5 +; RV32I-NEXT: or a1, a1, s10 ; RV32I-NEXT: mv s4, a5 ; RV32I-NEXT: beqz s7, .LBB15_22 ; RV32I-NEXT: .LBB15_21: @@ -5905,7 +5877,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB15_24: ; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: or s2, a6, s1 -; RV32I-NEXT: or s4, a7, s3 +; RV32I-NEXT: or s4, t0, s3 ; RV32I-NEXT: .LBB15_25: ; RV32I-NEXT: sub ra, a1, a4 ; RV32I-NEXT: mv a7, t1 @@ -5920,15 +5892,15 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: bltu ra, t4, .LBB15_29 ; RV32I-NEXT: # %bb.28: ; RV32I-NEXT: srl a1, t2, ra -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, t1 ; RV32I-NEXT: bnez ra, .LBB15_30 ; RV32I-NEXT: j .LBB15_31 ; RV32I-NEXT: .LBB15_29: ; RV32I-NEXT: or a1, s0, s2 -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, t1 ; RV32I-NEXT: beqz ra, .LBB15_31 ; RV32I-NEXT: .LBB15_30: -; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, a1 ; RV32I-NEXT: .LBB15_31: ; RV32I-NEXT: bltu ra, t4, .LBB15_33 ; RV32I-NEXT: # %bb.32: @@ -5938,7 +5910,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: bnez ra, .LBB15_34 ; RV32I-NEXT: j .LBB15_35 ; RV32I-NEXT: .LBB15_33: -; RV32I-NEXT: srl a1, t2, s10 +; RV32I-NEXT: srl a1, t2, s5 ; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: sll a1, a5, s1 ; RV32I-NEXT: or a1, t5, a1 @@ -5959,7 +5931,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a1, a1, s2 ; RV32I-NEXT: j .LBB15_40 ; RV32I-NEXT: .LBB15_38: -; RV32I-NEXT: srl a1, a5, s10 +; RV32I-NEXT: srl a1, a5, s5 ; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s3, t4, .LBB15_37 ; RV32I-NEXT: .LBB15_39: @@ -5972,35 +5944,33 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: # %bb.41: ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: .LBB15_42: -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s5, a7 +; RV32I-NEXT: sw t0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: bltu s4, t4, .LBB15_44 ; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: srl t0, t2, s4 +; RV32I-NEXT: srl a7, t2, s4 ; RV32I-NEXT: j .LBB15_45 ; RV32I-NEXT: .LBB15_44: ; RV32I-NEXT: srl a1, t1, ra -; RV32I-NEXT: neg t0, s4 -; RV32I-NEXT: sll t0, t2, t0 -; RV32I-NEXT: or t0, a1, t0 +; RV32I-NEXT: neg a7, s4 +; RV32I-NEXT: sll a7, t2, a7 +; RV32I-NEXT: or a7, a1, a7 ; RV32I-NEXT: .LBB15_45: -; RV32I-NEXT: mv s0, s10 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: lbu s8, 19(a0) +; RV32I-NEXT: sw s10, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s0, 64 +; RV32I-NEXT: lbu t6, 19(a0) ; RV32I-NEXT: lbu a1, 23(a0) ; RV32I-NEXT: mv s3, t1 ; RV32I-NEXT: beqz s4, .LBB15_47 ; RV32I-NEXT: # %bb.46: -; RV32I-NEXT: mv s3, t0 +; RV32I-NEXT: mv s3, a7 ; RV32I-NEXT: .LBB15_47: -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: lbu s10, 17(a0) -; RV32I-NEXT: lbu t0, 18(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: lbu s11, 17(a0) +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu s9, 21(a0) -; RV32I-NEXT: lbu t6, 22(a0) -; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: lbu s8, 22(a0) ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: li a3, 64 ; RV32I-NEXT: bltu s4, t4, .LBB15_49 ; RV32I-NEXT: # %bb.48: ; RV32I-NEXT: li s4, 0 @@ -6008,45 +5978,41 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB15_49: ; RV32I-NEXT: srl s4, t2, ra ; RV32I-NEXT: .LBB15_50: -; RV32I-NEXT: or s11, s8, t0 -; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu s8, 20(a0) -; RV32I-NEXT: slli s10, s10, 8 +; RV32I-NEXT: slli s11, s11, 8 +; RV32I-NEXT: or s10, t6, a7 +; RV32I-NEXT: lbu a7, 16(a0) +; RV32I-NEXT: lbu t6, 20(a0) ; RV32I-NEXT: slli s9, s9, 8 -; RV32I-NEXT: or t6, a1, t6 -; RV32I-NEXT: bgeu ra, a3, .LBB15_52 +; RV32I-NEXT: or s8, a1, s8 +; RV32I-NEXT: bgeu ra, s0, .LBB15_52 ; RV32I-NEXT: # %bb.51: ; RV32I-NEXT: or s3, t5, s1 ; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload ; RV32I-NEXT: or s4, a1, s2 ; RV32I-NEXT: .LBB15_52: -; RV32I-NEXT: or a1, s10, t0 -; RV32I-NEXT: slli s11, s11, 16 -; RV32I-NEXT: or t0, s9, s8 -; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: or a1, s11, a7 +; RV32I-NEXT: slli s10, s10, 16 +; RV32I-NEXT: or a7, s9, t6 +; RV32I-NEXT: slli s8, s8, 16 ; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: mv s1, a5 -; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: mv t6, a5 ; RV32I-NEXT: beqz ra, .LBB15_54 ; RV32I-NEXT: # %bb.53: ; RV32I-NEXT: mv t5, s3 -; RV32I-NEXT: mv s1, s4 +; RV32I-NEXT: mv t6, s4 ; RV32I-NEXT: .LBB15_54: -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s2, s11, a1 -; RV32I-NEXT: or s1, t6, t0 +; RV32I-NEXT: or s2, s10, a1 +; RV32I-NEXT: or s1, s8, a7 ; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: mv a6, a7 -; RV32I-NEXT: mv a7, s0 ; RV32I-NEXT: bltu ra, a1, .LBB15_56 ; RV32I-NEXT: # %bb.55: ; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: .LBB15_56: -; RV32I-NEXT: srl s3, s2, a7 -; RV32I-NEXT: sll ra, s1, a4 -; RV32I-NEXT: mv a7, s5 +; RV32I-NEXT: srl s3, s2, s5 +; RV32I-NEXT: sll s0, s1, a4 ; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu a4, t4, .LBB15_58 ; RV32I-NEXT: # %bb.57: ; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill @@ -6055,54 +6021,54 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB15_58: ; RV32I-NEXT: sll a1, s2, a4 ; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: or a1, s3, ra +; RV32I-NEXT: or a1, s3, s0 ; RV32I-NEXT: .LBB15_59: -; RV32I-NEXT: lbu s9, 27(a0) +; RV32I-NEXT: lbu s11, 27(a0) ; RV32I-NEXT: lbu t6, 31(a0) ; RV32I-NEXT: mv t5, s1 ; RV32I-NEXT: beqz a4, .LBB15_61 ; RV32I-NEXT: # %bb.60: ; RV32I-NEXT: mv t5, a1 ; RV32I-NEXT: .LBB15_61: -; RV32I-NEXT: lbu s8, 25(a0) -; RV32I-NEXT: lbu s4, 26(a0) -; RV32I-NEXT: lbu s11, 29(a0) -; RV32I-NEXT: lbu s10, 30(a0) -; RV32I-NEXT: slli s9, s9, 8 +; RV32I-NEXT: slli s11, s11, 8 +; RV32I-NEXT: lbu s9, 25(a0) +; RV32I-NEXT: lbu s8, 26(a0) +; RV32I-NEXT: lbu s10, 29(a0) +; RV32I-NEXT: lbu s4, 30(a0) ; RV32I-NEXT: slli t6, t6, 8 ; RV32I-NEXT: bltu s6, t4, .LBB15_63 ; RV32I-NEXT: # %bb.62: -; RV32I-NEXT: srl t0, s1, s6 +; RV32I-NEXT: srl a7, s1, s6 ; RV32I-NEXT: j .LBB15_64 ; RV32I-NEXT: .LBB15_63: ; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: sll a1, s1, a1 -; RV32I-NEXT: or t0, s3, a1 +; RV32I-NEXT: or a7, s3, a1 ; RV32I-NEXT: .LBB15_64: -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: lbu s3, 24(a0) -; RV32I-NEXT: lbu a1, 28(a0) -; RV32I-NEXT: or s4, s9, s4 -; RV32I-NEXT: slli s11, s11, 8 -; RV32I-NEXT: or t6, t6, s10 +; RV32I-NEXT: slli s3, s9, 8 +; RV32I-NEXT: or a1, s11, s8 +; RV32I-NEXT: lbu s11, 24(a0) +; RV32I-NEXT: lbu s8, 28(a0) +; RV32I-NEXT: slli s10, s10, 8 +; RV32I-NEXT: or t6, t6, s4 ; RV32I-NEXT: mv s9, s2 ; RV32I-NEXT: beqz s6, .LBB15_66 ; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: mv s9, t0 +; RV32I-NEXT: mv s9, a7 ; RV32I-NEXT: .LBB15_66: -; RV32I-NEXT: or a0, s8, s3 -; RV32I-NEXT: slli t0, s4, 16 -; RV32I-NEXT: or a1, s11, a1 +; RV32I-NEXT: or a0, s3, s11 +; RV32I-NEXT: slli a7, a1, 16 +; RV32I-NEXT: or a1, s10, s8 ; RV32I-NEXT: slli t6, t6, 16 ; RV32I-NEXT: bltu s6, t4, .LBB15_68 ; RV32I-NEXT: # %bb.67: ; RV32I-NEXT: li s4, 0 ; RV32I-NEXT: j .LBB15_69 ; RV32I-NEXT: .LBB15_68: -; RV32I-NEXT: srl s4, s1, s0 +; RV32I-NEXT: srl s4, s1, s5 ; RV32I-NEXT: .LBB15_69: ; RV32I-NEXT: li s11, 64 -; RV32I-NEXT: or s6, t0, a0 +; RV32I-NEXT: or s6, a7, a0 ; RV32I-NEXT: or a0, t6, a1 ; RV32I-NEXT: bltu a4, t4, .LBB15_71 ; RV32I-NEXT: # %bb.70: @@ -6113,9 +6079,9 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: j .LBB15_73 ; RV32I-NEXT: .LBB15_71: ; RV32I-NEXT: sll s3, s6, a4 -; RV32I-NEXT: srl a1, s6, s0 -; RV32I-NEXT: sll t0, a0, a4 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: srl a1, s6, s5 +; RV32I-NEXT: sll a7, a0, a4 +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: mv s10, a0 ; RV32I-NEXT: beqz a4, .LBB15_73 ; RV32I-NEXT: .LBB15_72: @@ -6132,7 +6098,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll s5, s2, a4 ; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: srl a1, s2, a1 -; RV32I-NEXT: or a1, a1, ra +; RV32I-NEXT: or a1, a1, s0 ; RV32I-NEXT: mv s0, s1 ; RV32I-NEXT: beqz s7, .LBB15_77 ; RV32I-NEXT: .LBB15_76: @@ -6196,8 +6162,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB15_93: ; RV32I-NEXT: sll s10, t1, a4 ; RV32I-NEXT: srl a1, t1, s3 -; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: j .LBB15_96 ; RV32I-NEXT: .LBB15_94: ; RV32I-NEXT: srl s4, a5, s3 @@ -6223,8 +6189,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll t4, t3, s9 ; RV32I-NEXT: neg a1, s11 ; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: sll t0, a5, s9 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: sll a7, a5, s9 +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: beqz s11, .LBB15_102 ; RV32I-NEXT: .LBB15_101: ; RV32I-NEXT: mv a5, a1 @@ -6249,7 +6215,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: # %bb.107: ; RV32I-NEXT: li ra, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li t0, 0 ; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: bnez a4, .LBB15_109 ; RV32I-NEXT: j .LBB15_110 @@ -6276,8 +6242,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: srli t1, ra, 24 ; RV32I-NEXT: srli a5, a3, 16 ; RV32I-NEXT: srli t4, a3, 24 -; RV32I-NEXT: srli t0, a7, 16 -; RV32I-NEXT: srli s0, a7, 24 +; RV32I-NEXT: srli a7, t0, 16 +; RV32I-NEXT: srli s0, t0, 24 ; RV32I-NEXT: srli t3, a6, 16 ; RV32I-NEXT: srli s3, a6, 24 ; RV32I-NEXT: srli t6, s2, 16 @@ -6296,7 +6262,7 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sb s10, 1(a2) ; RV32I-NEXT: sb a4, 2(a2) ; RV32I-NEXT: sb t1, 3(a2) -; RV32I-NEXT: and a4, a7, t2 +; RV32I-NEXT: and a4, t0, t2 ; RV32I-NEXT: srli t1, s11, 8 ; RV32I-NEXT: sb a3, 4(a2) ; RV32I-NEXT: sb t1, 5(a2) @@ -6304,9 +6270,9 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sb t4, 7(a2) ; RV32I-NEXT: and a3, a6, t2 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 8(a2) +; RV32I-NEXT: sb t0, 8(a2) ; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb t0, 10(a2) +; RV32I-NEXT: sb a7, 10(a2) ; RV32I-NEXT: sb s0, 11(a2) ; RV32I-NEXT: and a4, s2, t2 ; RV32I-NEXT: srli a3, a3, 8 @@ -6698,54 +6664,54 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: slli t3, t3, 8 ; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: lbu t0, 0(a1) -; RV32I-NEXT: lbu t1, 1(a1) -; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: or t0, t3, t2 +; RV32I-NEXT: lbu t1, 0(a1) +; RV32I-NEXT: lbu t2, 1(a1) ; RV32I-NEXT: lbu t3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: li s9, 64 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: li t4, 32 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: or t3, a5, a4 -; RV32I-NEXT: or a5, t2, a7 -; RV32I-NEXT: or a4, a1, t0 +; RV32I-NEXT: or a5, t0, a7 +; RV32I-NEXT: or a4, a1, t1 ; RV32I-NEXT: slli a4, a4, 5 -; RV32I-NEXT: neg s10, a4 -; RV32I-NEXT: srl t5, t3, s10 -; RV32I-NEXT: sll s5, a5, a4 +; RV32I-NEXT: neg s5, a4 +; RV32I-NEXT: srl t5, t3, s5 +; RV32I-NEXT: sll s10, a5, a4 ; RV32I-NEXT: bltu a4, t4, .LBB16_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: sll a7, t3, a4 +; RV32I-NEXT: sll t0, t3, a4 ; RV32I-NEXT: j .LBB16_3 ; RV32I-NEXT: .LBB16_2: ; RV32I-NEXT: sll s8, t3, a4 -; RV32I-NEXT: or a7, t5, s5 +; RV32I-NEXT: or t0, t5, s10 ; RV32I-NEXT: .LBB16_3: +; RV32I-NEXT: slli a6, a6, 8 ; RV32I-NEXT: lbu t2, 9(a0) -; RV32I-NEXT: lbu a1, 10(a0) +; RV32I-NEXT: lbu a7, 10(a0) ; RV32I-NEXT: lbu t1, 13(a0) -; RV32I-NEXT: lbu t0, 14(a0) -; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: lbu a1, 14(a0) ; RV32I-NEXT: slli t6, a3, 8 ; RV32I-NEXT: sub s6, s9, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: beqz a4, .LBB16_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB16_5: -; RV32I-NEXT: slli a7, t2, 8 -; RV32I-NEXT: or a6, a6, a1 +; RV32I-NEXT: slli t0, t2, 8 +; RV32I-NEXT: or a6, a6, a7 ; RV32I-NEXT: lbu t2, 8(a0) -; RV32I-NEXT: lbu a1, 12(a0) +; RV32I-NEXT: lbu a7, 12(a0) ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t0, t6, t0 +; RV32I-NEXT: or a1, t6, a1 ; RV32I-NEXT: neg t6, s6 ; RV32I-NEXT: sw t6, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s6, t4, .LBB16_7 @@ -6756,25 +6722,25 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sll t6, a5, t6 ; RV32I-NEXT: or t6, t5, t6 ; RV32I-NEXT: .LBB16_8: -; RV32I-NEXT: or a7, a7, t2 +; RV32I-NEXT: or t0, t0, t2 ; RV32I-NEXT: slli t2, a6, 16 -; RV32I-NEXT: or a1, t1, a1 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: or a7, t1, a7 +; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: mv a6, t3 ; RV32I-NEXT: beqz s6, .LBB16_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a6, t6 ; RV32I-NEXT: .LBB16_10: -; RV32I-NEXT: or t1, t2, a7 -; RV32I-NEXT: or t2, t0, a1 +; RV32I-NEXT: or t1, t2, t0 +; RV32I-NEXT: or t2, a1, a7 ; RV32I-NEXT: bltu s6, t4, .LBB16_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li t0, 0 ; RV32I-NEXT: j .LBB16_13 ; RV32I-NEXT: .LBB16_12: -; RV32I-NEXT: srl a7, a5, s10 +; RV32I-NEXT: srl t0, a5, s5 ; RV32I-NEXT: .LBB16_13: -; RV32I-NEXT: srl s0, t1, s10 +; RV32I-NEXT: srl s0, t1, s5 ; RV32I-NEXT: sll a1, t2, a4 ; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu a4, t4, .LBB16_15 @@ -6804,7 +6770,7 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: .LBB16_20: ; RV32I-NEXT: sll s2, t3, a4 ; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: or a1, a1, s5 +; RV32I-NEXT: or a1, a1, s10 ; RV32I-NEXT: mv s4, a5 ; RV32I-NEXT: beqz s7, .LBB16_22 ; RV32I-NEXT: .LBB16_21: @@ -6819,7 +6785,7 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: .LBB16_24: ; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: or s2, a6, s1 -; RV32I-NEXT: or s4, a7, s3 +; RV32I-NEXT: or s4, t0, s3 ; RV32I-NEXT: .LBB16_25: ; RV32I-NEXT: sub ra, a1, a4 ; RV32I-NEXT: mv a7, t1 @@ -6834,15 +6800,15 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: bltu ra, t4, .LBB16_29 ; RV32I-NEXT: # %bb.28: ; RV32I-NEXT: srl a1, t2, ra -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, t1 ; RV32I-NEXT: bnez ra, .LBB16_30 ; RV32I-NEXT: j .LBB16_31 ; RV32I-NEXT: .LBB16_29: ; RV32I-NEXT: or a1, s0, s2 -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, t1 ; RV32I-NEXT: beqz ra, .LBB16_31 ; RV32I-NEXT: .LBB16_30: -; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, a1 ; RV32I-NEXT: .LBB16_31: ; RV32I-NEXT: bltu ra, t4, .LBB16_33 ; RV32I-NEXT: # %bb.32: @@ -6852,7 +6818,7 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: bnez ra, .LBB16_34 ; RV32I-NEXT: j .LBB16_35 ; RV32I-NEXT: .LBB16_33: -; RV32I-NEXT: srl a1, t2, s10 +; RV32I-NEXT: srl a1, t2, s5 ; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: sll a1, a5, s1 ; RV32I-NEXT: or a1, t5, a1 @@ -6873,7 +6839,7 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: or a1, a1, s2 ; RV32I-NEXT: j .LBB16_40 ; RV32I-NEXT: .LBB16_38: -; RV32I-NEXT: srl a1, a5, s10 +; RV32I-NEXT: srl a1, a5, s5 ; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s3, t4, .LBB16_37 ; RV32I-NEXT: .LBB16_39: @@ -6886,35 +6852,33 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: # %bb.41: ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: .LBB16_42: -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s5, a7 +; RV32I-NEXT: sw t0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: bltu s4, t4, .LBB16_44 ; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: srl t0, t2, s4 +; RV32I-NEXT: srl a7, t2, s4 ; RV32I-NEXT: j .LBB16_45 ; RV32I-NEXT: .LBB16_44: ; RV32I-NEXT: srl a1, t1, ra -; RV32I-NEXT: neg t0, s4 -; RV32I-NEXT: sll t0, t2, t0 -; RV32I-NEXT: or t0, a1, t0 +; RV32I-NEXT: neg a7, s4 +; RV32I-NEXT: sll a7, t2, a7 +; RV32I-NEXT: or a7, a1, a7 ; RV32I-NEXT: .LBB16_45: -; RV32I-NEXT: mv s0, s10 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: lbu s8, 19(a0) +; RV32I-NEXT: sw s10, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s0, 64 +; RV32I-NEXT: lbu t6, 19(a0) ; RV32I-NEXT: lbu a1, 23(a0) ; RV32I-NEXT: mv s3, t1 ; RV32I-NEXT: beqz s4, .LBB16_47 ; RV32I-NEXT: # %bb.46: -; RV32I-NEXT: mv s3, t0 +; RV32I-NEXT: mv s3, a7 ; RV32I-NEXT: .LBB16_47: -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: lbu s10, 17(a0) -; RV32I-NEXT: lbu t0, 18(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: lbu s11, 17(a0) +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu s9, 21(a0) -; RV32I-NEXT: lbu t6, 22(a0) -; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: lbu s8, 22(a0) ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: li a3, 64 ; RV32I-NEXT: bltu s4, t4, .LBB16_49 ; RV32I-NEXT: # %bb.48: ; RV32I-NEXT: li s4, 0 @@ -6922,45 +6886,41 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: .LBB16_49: ; RV32I-NEXT: srl s4, t2, ra ; RV32I-NEXT: .LBB16_50: -; RV32I-NEXT: or s11, s8, t0 -; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu s8, 20(a0) -; RV32I-NEXT: slli s10, s10, 8 +; RV32I-NEXT: slli s11, s11, 8 +; RV32I-NEXT: or s10, t6, a7 +; RV32I-NEXT: lbu a7, 16(a0) +; RV32I-NEXT: lbu t6, 20(a0) ; RV32I-NEXT: slli s9, s9, 8 -; RV32I-NEXT: or t6, a1, t6 -; RV32I-NEXT: bgeu ra, a3, .LBB16_52 +; RV32I-NEXT: or s8, a1, s8 +; RV32I-NEXT: bgeu ra, s0, .LBB16_52 ; RV32I-NEXT: # %bb.51: ; RV32I-NEXT: or s3, t5, s1 ; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload ; RV32I-NEXT: or s4, a1, s2 ; RV32I-NEXT: .LBB16_52: -; RV32I-NEXT: or a1, s10, t0 -; RV32I-NEXT: slli s11, s11, 16 -; RV32I-NEXT: or t0, s9, s8 -; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: or a1, s11, a7 +; RV32I-NEXT: slli s10, s10, 16 +; RV32I-NEXT: or a7, s9, t6 +; RV32I-NEXT: slli s8, s8, 16 ; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: mv s1, a5 -; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: mv t6, a5 ; RV32I-NEXT: beqz ra, .LBB16_54 ; RV32I-NEXT: # %bb.53: ; RV32I-NEXT: mv t5, s3 -; RV32I-NEXT: mv s1, s4 +; RV32I-NEXT: mv t6, s4 ; RV32I-NEXT: .LBB16_54: -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s2, s11, a1 -; RV32I-NEXT: or s1, t6, t0 +; RV32I-NEXT: or s2, s10, a1 +; RV32I-NEXT: or s1, s8, a7 ; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: mv a6, a7 -; RV32I-NEXT: mv a7, s0 ; RV32I-NEXT: bltu ra, a1, .LBB16_56 ; RV32I-NEXT: # %bb.55: ; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: .LBB16_56: -; RV32I-NEXT: srl s3, s2, a7 -; RV32I-NEXT: sll ra, s1, a4 -; RV32I-NEXT: mv a7, s5 +; RV32I-NEXT: srl s3, s2, s5 +; RV32I-NEXT: sll s0, s1, a4 ; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu a4, t4, .LBB16_58 ; RV32I-NEXT: # %bb.57: ; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill @@ -6969,54 +6929,54 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: .LBB16_58: ; RV32I-NEXT: sll a1, s2, a4 ; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: or a1, s3, ra +; RV32I-NEXT: or a1, s3, s0 ; RV32I-NEXT: .LBB16_59: -; RV32I-NEXT: lbu s9, 27(a0) +; RV32I-NEXT: lbu s11, 27(a0) ; RV32I-NEXT: lbu t6, 31(a0) ; RV32I-NEXT: mv t5, s1 ; RV32I-NEXT: beqz a4, .LBB16_61 ; RV32I-NEXT: # %bb.60: ; RV32I-NEXT: mv t5, a1 ; RV32I-NEXT: .LBB16_61: -; RV32I-NEXT: lbu s8, 25(a0) -; RV32I-NEXT: lbu s4, 26(a0) -; RV32I-NEXT: lbu s11, 29(a0) -; RV32I-NEXT: lbu s10, 30(a0) -; RV32I-NEXT: slli s9, s9, 8 +; RV32I-NEXT: slli s11, s11, 8 +; RV32I-NEXT: lbu s9, 25(a0) +; RV32I-NEXT: lbu s8, 26(a0) +; RV32I-NEXT: lbu s10, 29(a0) +; RV32I-NEXT: lbu s4, 30(a0) ; RV32I-NEXT: slli t6, t6, 8 ; RV32I-NEXT: bltu s6, t4, .LBB16_63 ; RV32I-NEXT: # %bb.62: -; RV32I-NEXT: srl t0, s1, s6 +; RV32I-NEXT: srl a7, s1, s6 ; RV32I-NEXT: j .LBB16_64 ; RV32I-NEXT: .LBB16_63: ; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: sll a1, s1, a1 -; RV32I-NEXT: or t0, s3, a1 +; RV32I-NEXT: or a7, s3, a1 ; RV32I-NEXT: .LBB16_64: -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: lbu s3, 24(a0) -; RV32I-NEXT: lbu a1, 28(a0) -; RV32I-NEXT: or s4, s9, s4 -; RV32I-NEXT: slli s11, s11, 8 -; RV32I-NEXT: or t6, t6, s10 +; RV32I-NEXT: slli s3, s9, 8 +; RV32I-NEXT: or a1, s11, s8 +; RV32I-NEXT: lbu s11, 24(a0) +; RV32I-NEXT: lbu s8, 28(a0) +; RV32I-NEXT: slli s10, s10, 8 +; RV32I-NEXT: or t6, t6, s4 ; RV32I-NEXT: mv s9, s2 ; RV32I-NEXT: beqz s6, .LBB16_66 ; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: mv s9, t0 +; RV32I-NEXT: mv s9, a7 ; RV32I-NEXT: .LBB16_66: -; RV32I-NEXT: or a0, s8, s3 -; RV32I-NEXT: slli t0, s4, 16 -; RV32I-NEXT: or a1, s11, a1 +; RV32I-NEXT: or a0, s3, s11 +; RV32I-NEXT: slli a7, a1, 16 +; RV32I-NEXT: or a1, s10, s8 ; RV32I-NEXT: slli t6, t6, 16 ; RV32I-NEXT: bltu s6, t4, .LBB16_68 ; RV32I-NEXT: # %bb.67: ; RV32I-NEXT: li s4, 0 ; RV32I-NEXT: j .LBB16_69 ; RV32I-NEXT: .LBB16_68: -; RV32I-NEXT: srl s4, s1, s0 +; RV32I-NEXT: srl s4, s1, s5 ; RV32I-NEXT: .LBB16_69: ; RV32I-NEXT: li s11, 64 -; RV32I-NEXT: or s6, t0, a0 +; RV32I-NEXT: or s6, a7, a0 ; RV32I-NEXT: or a0, t6, a1 ; RV32I-NEXT: bltu a4, t4, .LBB16_71 ; RV32I-NEXT: # %bb.70: @@ -7027,9 +6987,9 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: j .LBB16_73 ; RV32I-NEXT: .LBB16_71: ; RV32I-NEXT: sll s3, s6, a4 -; RV32I-NEXT: srl a1, s6, s0 -; RV32I-NEXT: sll t0, a0, a4 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: srl a1, s6, s5 +; RV32I-NEXT: sll a7, a0, a4 +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: mv s10, a0 ; RV32I-NEXT: beqz a4, .LBB16_73 ; RV32I-NEXT: .LBB16_72: @@ -7046,7 +7006,7 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sll s5, s2, a4 ; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: srl a1, s2, a1 -; RV32I-NEXT: or a1, a1, ra +; RV32I-NEXT: or a1, a1, s0 ; RV32I-NEXT: mv s0, s1 ; RV32I-NEXT: beqz s7, .LBB16_77 ; RV32I-NEXT: .LBB16_76: @@ -7110,8 +7070,8 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: .LBB16_93: ; RV32I-NEXT: sll s10, t1, a4 ; RV32I-NEXT: srl a1, t1, s3 -; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: j .LBB16_96 ; RV32I-NEXT: .LBB16_94: ; RV32I-NEXT: srl s4, a5, s3 @@ -7137,8 +7097,8 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sll t4, t3, s9 ; RV32I-NEXT: neg a1, s11 ; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: sll t0, a5, s9 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: sll a7, a5, s9 +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: beqz s11, .LBB16_102 ; RV32I-NEXT: .LBB16_101: ; RV32I-NEXT: mv a5, a1 @@ -7163,7 +7123,7 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: # %bb.107: ; RV32I-NEXT: li ra, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li t0, 0 ; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: bnez a4, .LBB16_109 ; RV32I-NEXT: j .LBB16_110 @@ -7190,8 +7150,8 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: srli t1, ra, 24 ; RV32I-NEXT: srli a5, a3, 16 ; RV32I-NEXT: srli t4, a3, 24 -; RV32I-NEXT: srli t0, a7, 16 -; RV32I-NEXT: srli s0, a7, 24 +; RV32I-NEXT: srli a7, t0, 16 +; RV32I-NEXT: srli s0, t0, 24 ; RV32I-NEXT: srli t3, a6, 16 ; RV32I-NEXT: srli s3, a6, 24 ; RV32I-NEXT: srli t6, s2, 16 @@ -7210,7 +7170,7 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sb s10, 1(a2) ; RV32I-NEXT: sb a4, 2(a2) ; RV32I-NEXT: sb t1, 3(a2) -; RV32I-NEXT: and a4, a7, t2 +; RV32I-NEXT: and a4, t0, t2 ; RV32I-NEXT: srli t1, s11, 8 ; RV32I-NEXT: sb a3, 4(a2) ; RV32I-NEXT: sb t1, 5(a2) @@ -7218,9 +7178,9 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sb t4, 7(a2) ; RV32I-NEXT: and a3, a6, t2 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 8(a2) +; RV32I-NEXT: sb t0, 8(a2) ; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb t0, 10(a2) +; RV32I-NEXT: sb a7, 10(a2) ; RV32I-NEXT: sb s0, 11(a2) ; RV32I-NEXT: and a4, s2, t2 ; RV32I-NEXT: srli a3, a3, 8 @@ -7612,54 +7572,54 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: slli t3, t3, 8 ; RV32I-NEXT: or a5, a7, a5 ; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: lbu t0, 0(a1) -; RV32I-NEXT: lbu t1, 1(a1) -; RV32I-NEXT: or t2, t3, t2 +; RV32I-NEXT: or t0, t3, t2 +; RV32I-NEXT: lbu t1, 0(a1) +; RV32I-NEXT: lbu t2, 1(a1) ; RV32I-NEXT: lbu t3, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: li s9, 64 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: or a1, a1, t3 ; RV32I-NEXT: li t4, 32 ; RV32I-NEXT: slli a5, a5, 16 -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: or t3, a5, a4 -; RV32I-NEXT: or a5, t2, a7 -; RV32I-NEXT: or a4, a1, t0 +; RV32I-NEXT: or a5, t0, a7 +; RV32I-NEXT: or a4, a1, t1 ; RV32I-NEXT: slli a4, a4, 6 -; RV32I-NEXT: neg s10, a4 -; RV32I-NEXT: srl t5, t3, s10 -; RV32I-NEXT: sll s5, a5, a4 +; RV32I-NEXT: neg s5, a4 +; RV32I-NEXT: srl t5, t3, s5 +; RV32I-NEXT: sll s10, a5, a4 ; RV32I-NEXT: bltu a4, t4, .LBB17_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: li s8, 0 -; RV32I-NEXT: sll a7, t3, a4 +; RV32I-NEXT: sll t0, t3, a4 ; RV32I-NEXT: j .LBB17_3 ; RV32I-NEXT: .LBB17_2: ; RV32I-NEXT: sll s8, t3, a4 -; RV32I-NEXT: or a7, t5, s5 +; RV32I-NEXT: or t0, t5, s10 ; RV32I-NEXT: .LBB17_3: +; RV32I-NEXT: slli a6, a6, 8 ; RV32I-NEXT: lbu t2, 9(a0) -; RV32I-NEXT: lbu a1, 10(a0) +; RV32I-NEXT: lbu a7, 10(a0) ; RV32I-NEXT: lbu t1, 13(a0) -; RV32I-NEXT: lbu t0, 14(a0) -; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: lbu a1, 14(a0) ; RV32I-NEXT: slli t6, a3, 8 ; RV32I-NEXT: sub s6, s9, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: beqz a4, .LBB17_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: mv a3, a7 +; RV32I-NEXT: mv a3, t0 ; RV32I-NEXT: .LBB17_5: -; RV32I-NEXT: slli a7, t2, 8 -; RV32I-NEXT: or a6, a6, a1 +; RV32I-NEXT: slli t0, t2, 8 +; RV32I-NEXT: or a6, a6, a7 ; RV32I-NEXT: lbu t2, 8(a0) -; RV32I-NEXT: lbu a1, 12(a0) +; RV32I-NEXT: lbu a7, 12(a0) ; RV32I-NEXT: slli t1, t1, 8 -; RV32I-NEXT: or t0, t6, t0 +; RV32I-NEXT: or a1, t6, a1 ; RV32I-NEXT: neg t6, s6 ; RV32I-NEXT: sw t6, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s6, t4, .LBB17_7 @@ -7670,25 +7630,25 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: sll t6, a5, t6 ; RV32I-NEXT: or t6, t5, t6 ; RV32I-NEXT: .LBB17_8: -; RV32I-NEXT: or a7, a7, t2 +; RV32I-NEXT: or t0, t0, t2 ; RV32I-NEXT: slli t2, a6, 16 -; RV32I-NEXT: or a1, t1, a1 -; RV32I-NEXT: slli t0, t0, 16 +; RV32I-NEXT: or a7, t1, a7 +; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: mv a6, t3 ; RV32I-NEXT: beqz s6, .LBB17_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a6, t6 ; RV32I-NEXT: .LBB17_10: -; RV32I-NEXT: or t1, t2, a7 -; RV32I-NEXT: or t2, t0, a1 +; RV32I-NEXT: or t1, t2, t0 +; RV32I-NEXT: or t2, a1, a7 ; RV32I-NEXT: bltu s6, t4, .LBB17_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li t0, 0 ; RV32I-NEXT: j .LBB17_13 ; RV32I-NEXT: .LBB17_12: -; RV32I-NEXT: srl a7, a5, s10 +; RV32I-NEXT: srl t0, a5, s5 ; RV32I-NEXT: .LBB17_13: -; RV32I-NEXT: srl s0, t1, s10 +; RV32I-NEXT: srl s0, t1, s5 ; RV32I-NEXT: sll a1, t2, a4 ; RV32I-NEXT: sw a1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu a4, t4, .LBB17_15 @@ -7718,7 +7678,7 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: .LBB17_20: ; RV32I-NEXT: sll s2, t3, a4 ; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: or a1, a1, s5 +; RV32I-NEXT: or a1, a1, s10 ; RV32I-NEXT: mv s4, a5 ; RV32I-NEXT: beqz s7, .LBB17_22 ; RV32I-NEXT: .LBB17_21: @@ -7733,7 +7693,7 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: .LBB17_24: ; RV32I-NEXT: sw s8, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: or s2, a6, s1 -; RV32I-NEXT: or s4, a7, s3 +; RV32I-NEXT: or s4, t0, s3 ; RV32I-NEXT: .LBB17_25: ; RV32I-NEXT: sub ra, a1, a4 ; RV32I-NEXT: mv a7, t1 @@ -7748,15 +7708,15 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: bltu ra, t4, .LBB17_29 ; RV32I-NEXT: # %bb.28: ; RV32I-NEXT: srl a1, t2, ra -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, t1 ; RV32I-NEXT: bnez ra, .LBB17_30 ; RV32I-NEXT: j .LBB17_31 ; RV32I-NEXT: .LBB17_29: ; RV32I-NEXT: or a1, s0, s2 -; RV32I-NEXT: sw t1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, t1 ; RV32I-NEXT: beqz ra, .LBB17_31 ; RV32I-NEXT: .LBB17_30: -; RV32I-NEXT: sw a1, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, a1 ; RV32I-NEXT: .LBB17_31: ; RV32I-NEXT: bltu ra, t4, .LBB17_33 ; RV32I-NEXT: # %bb.32: @@ -7766,7 +7726,7 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: bnez ra, .LBB17_34 ; RV32I-NEXT: j .LBB17_35 ; RV32I-NEXT: .LBB17_33: -; RV32I-NEXT: srl a1, t2, s10 +; RV32I-NEXT: srl a1, t2, s5 ; RV32I-NEXT: sw a1, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: sll a1, a5, s1 ; RV32I-NEXT: or a1, t5, a1 @@ -7787,7 +7747,7 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: or a1, a1, s2 ; RV32I-NEXT: j .LBB17_40 ; RV32I-NEXT: .LBB17_38: -; RV32I-NEXT: srl a1, a5, s10 +; RV32I-NEXT: srl a1, a5, s5 ; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s3, t4, .LBB17_37 ; RV32I-NEXT: .LBB17_39: @@ -7800,35 +7760,33 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: # %bb.41: ; RV32I-NEXT: mv s2, a1 ; RV32I-NEXT: .LBB17_42: -; RV32I-NEXT: sw s5, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s5, a7 +; RV32I-NEXT: sw t0, 40(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: bltu s4, t4, .LBB17_44 ; RV32I-NEXT: # %bb.43: -; RV32I-NEXT: srl t0, t2, s4 +; RV32I-NEXT: srl a7, t2, s4 ; RV32I-NEXT: j .LBB17_45 ; RV32I-NEXT: .LBB17_44: ; RV32I-NEXT: srl a1, t1, ra -; RV32I-NEXT: neg t0, s4 -; RV32I-NEXT: sll t0, t2, t0 -; RV32I-NEXT: or t0, a1, t0 +; RV32I-NEXT: neg a7, s4 +; RV32I-NEXT: sll a7, t2, a7 +; RV32I-NEXT: or a7, a1, a7 ; RV32I-NEXT: .LBB17_45: -; RV32I-NEXT: mv s0, s10 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: lbu s8, 19(a0) +; RV32I-NEXT: sw s10, 24(sp) # 4-byte Folded Spill +; RV32I-NEXT: li s0, 64 +; RV32I-NEXT: lbu t6, 19(a0) ; RV32I-NEXT: lbu a1, 23(a0) ; RV32I-NEXT: mv s3, t1 ; RV32I-NEXT: beqz s4, .LBB17_47 ; RV32I-NEXT: # %bb.46: -; RV32I-NEXT: mv s3, t0 +; RV32I-NEXT: mv s3, a7 ; RV32I-NEXT: .LBB17_47: -; RV32I-NEXT: mv a6, a3 -; RV32I-NEXT: lbu s10, 17(a0) -; RV32I-NEXT: lbu t0, 18(a0) +; RV32I-NEXT: slli t6, t6, 8 +; RV32I-NEXT: lbu s11, 17(a0) +; RV32I-NEXT: lbu a7, 18(a0) ; RV32I-NEXT: lbu s9, 21(a0) -; RV32I-NEXT: lbu t6, 22(a0) -; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: lbu s8, 22(a0) ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: li a3, 64 ; RV32I-NEXT: bltu s4, t4, .LBB17_49 ; RV32I-NEXT: # %bb.48: ; RV32I-NEXT: li s4, 0 @@ -7836,45 +7794,41 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: .LBB17_49: ; RV32I-NEXT: srl s4, t2, ra ; RV32I-NEXT: .LBB17_50: -; RV32I-NEXT: or s11, s8, t0 -; RV32I-NEXT: lbu t0, 16(a0) -; RV32I-NEXT: lbu s8, 20(a0) -; RV32I-NEXT: slli s10, s10, 8 +; RV32I-NEXT: slli s11, s11, 8 +; RV32I-NEXT: or s10, t6, a7 +; RV32I-NEXT: lbu a7, 16(a0) +; RV32I-NEXT: lbu t6, 20(a0) ; RV32I-NEXT: slli s9, s9, 8 -; RV32I-NEXT: or t6, a1, t6 -; RV32I-NEXT: bgeu ra, a3, .LBB17_52 +; RV32I-NEXT: or s8, a1, s8 +; RV32I-NEXT: bgeu ra, s0, .LBB17_52 ; RV32I-NEXT: # %bb.51: ; RV32I-NEXT: or s3, t5, s1 ; RV32I-NEXT: lw a1, 32(sp) # 4-byte Folded Reload ; RV32I-NEXT: or s4, a1, s2 ; RV32I-NEXT: .LBB17_52: -; RV32I-NEXT: or a1, s10, t0 -; RV32I-NEXT: slli s11, s11, 16 -; RV32I-NEXT: or t0, s9, s8 -; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: or a1, s11, a7 +; RV32I-NEXT: slli s10, s10, 16 +; RV32I-NEXT: or a7, s9, t6 +; RV32I-NEXT: slli s8, s8, 16 ; RV32I-NEXT: mv t5, t3 -; RV32I-NEXT: mv s1, a5 -; RV32I-NEXT: mv a3, a6 +; RV32I-NEXT: mv t6, a5 ; RV32I-NEXT: beqz ra, .LBB17_54 ; RV32I-NEXT: # %bb.53: ; RV32I-NEXT: mv t5, s3 -; RV32I-NEXT: mv s1, s4 +; RV32I-NEXT: mv t6, s4 ; RV32I-NEXT: .LBB17_54: -; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: or s2, s11, a1 -; RV32I-NEXT: or s1, t6, t0 +; RV32I-NEXT: or s2, s10, a1 +; RV32I-NEXT: or s1, s8, a7 ; RV32I-NEXT: li a1, 64 -; RV32I-NEXT: mv a6, a7 -; RV32I-NEXT: mv a7, s0 ; RV32I-NEXT: bltu ra, a1, .LBB17_56 ; RV32I-NEXT: # %bb.55: ; RV32I-NEXT: sw zero, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw zero, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: .LBB17_56: -; RV32I-NEXT: srl s3, s2, a7 -; RV32I-NEXT: sll ra, s1, a4 -; RV32I-NEXT: mv a7, s5 +; RV32I-NEXT: srl s3, s2, s5 +; RV32I-NEXT: sll s0, s1, a4 ; RV32I-NEXT: sw t5, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw t6, 4(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu a4, t4, .LBB17_58 ; RV32I-NEXT: # %bb.57: ; RV32I-NEXT: sw zero, 32(sp) # 4-byte Folded Spill @@ -7883,54 +7837,54 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: .LBB17_58: ; RV32I-NEXT: sll a1, s2, a4 ; RV32I-NEXT: sw a1, 32(sp) # 4-byte Folded Spill -; RV32I-NEXT: or a1, s3, ra +; RV32I-NEXT: or a1, s3, s0 ; RV32I-NEXT: .LBB17_59: -; RV32I-NEXT: lbu s9, 27(a0) +; RV32I-NEXT: lbu s11, 27(a0) ; RV32I-NEXT: lbu t6, 31(a0) ; RV32I-NEXT: mv t5, s1 ; RV32I-NEXT: beqz a4, .LBB17_61 ; RV32I-NEXT: # %bb.60: ; RV32I-NEXT: mv t5, a1 ; RV32I-NEXT: .LBB17_61: -; RV32I-NEXT: lbu s8, 25(a0) -; RV32I-NEXT: lbu s4, 26(a0) -; RV32I-NEXT: lbu s11, 29(a0) -; RV32I-NEXT: lbu s10, 30(a0) -; RV32I-NEXT: slli s9, s9, 8 +; RV32I-NEXT: slli s11, s11, 8 +; RV32I-NEXT: lbu s9, 25(a0) +; RV32I-NEXT: lbu s8, 26(a0) +; RV32I-NEXT: lbu s10, 29(a0) +; RV32I-NEXT: lbu s4, 30(a0) ; RV32I-NEXT: slli t6, t6, 8 ; RV32I-NEXT: bltu s6, t4, .LBB17_63 ; RV32I-NEXT: # %bb.62: -; RV32I-NEXT: srl t0, s1, s6 +; RV32I-NEXT: srl a7, s1, s6 ; RV32I-NEXT: j .LBB17_64 ; RV32I-NEXT: .LBB17_63: ; RV32I-NEXT: lw a1, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: sll a1, s1, a1 -; RV32I-NEXT: or t0, s3, a1 +; RV32I-NEXT: or a7, s3, a1 ; RV32I-NEXT: .LBB17_64: -; RV32I-NEXT: slli s8, s8, 8 -; RV32I-NEXT: lbu s3, 24(a0) -; RV32I-NEXT: lbu a1, 28(a0) -; RV32I-NEXT: or s4, s9, s4 -; RV32I-NEXT: slli s11, s11, 8 -; RV32I-NEXT: or t6, t6, s10 +; RV32I-NEXT: slli s3, s9, 8 +; RV32I-NEXT: or a1, s11, s8 +; RV32I-NEXT: lbu s11, 24(a0) +; RV32I-NEXT: lbu s8, 28(a0) +; RV32I-NEXT: slli s10, s10, 8 +; RV32I-NEXT: or t6, t6, s4 ; RV32I-NEXT: mv s9, s2 ; RV32I-NEXT: beqz s6, .LBB17_66 ; RV32I-NEXT: # %bb.65: -; RV32I-NEXT: mv s9, t0 +; RV32I-NEXT: mv s9, a7 ; RV32I-NEXT: .LBB17_66: -; RV32I-NEXT: or a0, s8, s3 -; RV32I-NEXT: slli t0, s4, 16 -; RV32I-NEXT: or a1, s11, a1 +; RV32I-NEXT: or a0, s3, s11 +; RV32I-NEXT: slli a7, a1, 16 +; RV32I-NEXT: or a1, s10, s8 ; RV32I-NEXT: slli t6, t6, 16 ; RV32I-NEXT: bltu s6, t4, .LBB17_68 ; RV32I-NEXT: # %bb.67: ; RV32I-NEXT: li s4, 0 ; RV32I-NEXT: j .LBB17_69 ; RV32I-NEXT: .LBB17_68: -; RV32I-NEXT: srl s4, s1, s0 +; RV32I-NEXT: srl s4, s1, s5 ; RV32I-NEXT: .LBB17_69: ; RV32I-NEXT: li s11, 64 -; RV32I-NEXT: or s6, t0, a0 +; RV32I-NEXT: or s6, a7, a0 ; RV32I-NEXT: or a0, t6, a1 ; RV32I-NEXT: bltu a4, t4, .LBB17_71 ; RV32I-NEXT: # %bb.70: @@ -7941,9 +7895,9 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: j .LBB17_73 ; RV32I-NEXT: .LBB17_71: ; RV32I-NEXT: sll s3, s6, a4 -; RV32I-NEXT: srl a1, s6, s0 -; RV32I-NEXT: sll t0, a0, a4 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: srl a1, s6, s5 +; RV32I-NEXT: sll a7, a0, a4 +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: mv s10, a0 ; RV32I-NEXT: beqz a4, .LBB17_73 ; RV32I-NEXT: .LBB17_72: @@ -7960,7 +7914,7 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: sll s5, s2, a4 ; RV32I-NEXT: lw a1, 16(sp) # 4-byte Folded Reload ; RV32I-NEXT: srl a1, s2, a1 -; RV32I-NEXT: or a1, a1, ra +; RV32I-NEXT: or a1, a1, s0 ; RV32I-NEXT: mv s0, s1 ; RV32I-NEXT: beqz s7, .LBB17_77 ; RV32I-NEXT: .LBB17_76: @@ -8024,8 +7978,8 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: .LBB17_93: ; RV32I-NEXT: sll s10, t1, a4 ; RV32I-NEXT: srl a1, t1, s3 -; RV32I-NEXT: lw t0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: lw a7, 20(sp) # 4-byte Folded Reload +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: j .LBB17_96 ; RV32I-NEXT: .LBB17_94: ; RV32I-NEXT: srl s4, a5, s3 @@ -8051,8 +8005,8 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: sll t4, t3, s9 ; RV32I-NEXT: neg a1, s11 ; RV32I-NEXT: srl a1, t3, a1 -; RV32I-NEXT: sll t0, a5, s9 -; RV32I-NEXT: or a1, a1, t0 +; RV32I-NEXT: sll a7, a5, s9 +; RV32I-NEXT: or a1, a1, a7 ; RV32I-NEXT: beqz s11, .LBB17_102 ; RV32I-NEXT: .LBB17_101: ; RV32I-NEXT: mv a5, a1 @@ -8077,7 +8031,7 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: # %bb.107: ; RV32I-NEXT: li ra, 0 ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: li a7, 0 +; RV32I-NEXT: li t0, 0 ; RV32I-NEXT: li a6, 0 ; RV32I-NEXT: bnez a4, .LBB17_109 ; RV32I-NEXT: j .LBB17_110 @@ -8104,8 +8058,8 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: srli t1, ra, 24 ; RV32I-NEXT: srli a5, a3, 16 ; RV32I-NEXT: srli t4, a3, 24 -; RV32I-NEXT: srli t0, a7, 16 -; RV32I-NEXT: srli s0, a7, 24 +; RV32I-NEXT: srli a7, t0, 16 +; RV32I-NEXT: srli s0, t0, 24 ; RV32I-NEXT: srli t3, a6, 16 ; RV32I-NEXT: srli s3, a6, 24 ; RV32I-NEXT: srli t6, s2, 16 @@ -8124,7 +8078,7 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: sb s10, 1(a2) ; RV32I-NEXT: sb a4, 2(a2) ; RV32I-NEXT: sb t1, 3(a2) -; RV32I-NEXT: and a4, a7, t2 +; RV32I-NEXT: and a4, t0, t2 ; RV32I-NEXT: srli t1, s11, 8 ; RV32I-NEXT: sb a3, 4(a2) ; RV32I-NEXT: sb t1, 5(a2) @@ -8132,9 +8086,9 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: sb t4, 7(a2) ; RV32I-NEXT: and a3, a6, t2 ; RV32I-NEXT: srli a4, a4, 8 -; RV32I-NEXT: sb a7, 8(a2) +; RV32I-NEXT: sb t0, 8(a2) ; RV32I-NEXT: sb a4, 9(a2) -; RV32I-NEXT: sb t0, 10(a2) +; RV32I-NEXT: sb a7, 10(a2) ; RV32I-NEXT: sb s0, 11(a2) ; RV32I-NEXT: and a4, s2, t2 ; RV32I-NEXT: srli a3, a3, 8 @@ -8227,88 +8181,88 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: or t0, t2, t1 ; RV64I-NEXT: lbu s8, 20(a0) ; RV64I-NEXT: lbu s9, 21(a0) ; RV64I-NEXT: lbu s10, 22(a0) ; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: slli s3, s3, 8 ; RV64I-NEXT: or a4, t4, t3 ; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: or t1, s1, s0 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: lbu t3, 24(a0) +; RV64I-NEXT: lbu t4, 25(a0) +; RV64I-NEXT: lbu t5, 26(a0) +; RV64I-NEXT: lbu t6, 27(a0) ; RV64I-NEXT: slli s5, s5, 8 ; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s9, s9, 8 +; RV64I-NEXT: or s0, s5, s4 +; RV64I-NEXT: or s1, s7, s6 +; RV64I-NEXT: or s2, s9, s8 +; RV64I-NEXT: lbu s3, 28(a0) +; RV64I-NEXT: lbu s4, 29(a0) +; RV64I-NEXT: lbu s5, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s11, s11, 8 +; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 +; RV64I-NEXT: slli s4, s4, 8 ; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: or s0, s1, s0 +; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or t4, t6, t5 +; RV64I-NEXT: or t5, s4, s3 ; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu s3, 1(a1) +; RV64I-NEXT: lbu s4, 2(a1) +; RV64I-NEXT: lbu s7, 3(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 +; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: slli s7, s7, 8 +; RV64I-NEXT: or s5, a0, s5 +; RV64I-NEXT: or t6, s3, t6 +; RV64I-NEXT: or s3, s7, s4 ; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: lbu s4, 5(a1) +; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or s4, s4, a0 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or a7, t3, t2 +; RV64I-NEXT: or s7, a1, s7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a1, t0, a7 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: or a0, t2, t1 +; RV64I-NEXT: slli s1, s1, 16 +; RV64I-NEXT: or s0, s1, s0 ; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 -; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t4, s0, t5 +; RV64I-NEXT: or a7, s6, s2 +; RV64I-NEXT: slli t4, t4, 16 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s5, s5, 16 +; RV64I-NEXT: or t4, s5, t5 ; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 +; RV64I-NEXT: or t5, s3, t6 ; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 +; RV64I-NEXT: or t6, s7, s4 ; RV64I-NEXT: li t0, 64 ; RV64I-NEXT: slli t3, a5, 16 ; RV64I-NEXT: slli t2, a6, 16 -; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a7, t1, a7 -; RV64I-NEXT: or a5, t5, t4 -; RV64I-NEXT: or a6, s0, t6 +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: slli t4, t4, 32 +; RV64I-NEXT: slli t6, t6, 32 +; RV64I-NEXT: or a7, a7, s0 +; RV64I-NEXT: or a5, t4, t1 +; RV64I-NEXT: or a6, t6, t5 ; RV64I-NEXT: slli a6, a6, 3 ; RV64I-NEXT: subw t1, a6, t0 ; RV64I-NEXT: negw t5, a6 @@ -8522,47 +8476,47 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu t0, 21(a0) ; RV32I-NEXT: lbu t1, 22(a0) ; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) ; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 24(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: lbu t4, 26(a0) +; RV32I-NEXT: lbu t5, 27(a0) ; RV32I-NEXT: slli a6, a6, 8 ; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, t2, t1 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a4, t3, a4 ; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t3, t4, t3 -; RV32I-NEXT: or t4, t6, t5 -; RV32I-NEXT: or t0, t0, a7 +; RV32I-NEXT: lbu t1, 29(a0) +; RV32I-NEXT: lbu t2, 30(a0) +; RV32I-NEXT: lbu t3, 31(a0) +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: or t1, t1, a7 +; RV32I-NEXT: or t2, t3, t2 ; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t5, 1(a1) -; RV32I-NEXT: lbu t6, 2(a1) +; RV32I-NEXT: lbu t3, 1(a1) +; RV32I-NEXT: lbu t5, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or s0, t5, a7 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t3, t3, a7 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 +; RV32I-NEXT: or t6, a1, t5 ; RV32I-NEXT: li t5, 32 -; RV32I-NEXT: slli a7, a4, 16 -; RV32I-NEXT: slli a1, a5, 16 +; RV32I-NEXT: slli a7, a5, 16 +; RV32I-NEXT: slli a1, t0, 16 ; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli a5, t2, 16 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or a4, t1, t0 -; RV32I-NEXT: or a5, a5, s0 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: or t2, t4, a4 +; RV32I-NEXT: or a4, a5, t1 +; RV32I-NEXT: or a5, t6, t3 ; RV32I-NEXT: slli a5, a5, 3 ; RV32I-NEXT: srl s0, t2, a5 ; RV32I-NEXT: neg s6, a5 @@ -8628,6 +8582,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB18_18: ; RV32I-NEXT: neg s11, s9 ; RV32I-NEXT: sw s0, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s9, t5, .LBB18_20 ; RV32I-NEXT: # %bb.19: ; RV32I-NEXT: sra s0, a4, s9 @@ -8636,20 +8591,19 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sll a3, a4, s11 ; RV32I-NEXT: or s0, s0, a3 ; RV32I-NEXT: .LBB18_21: -; RV32I-NEXT: sw s1, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw t4, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu s3, 11(a0) +; RV32I-NEXT: lbu s1, 11(a0) ; RV32I-NEXT: lbu a3, 15(a0) ; RV32I-NEXT: mv t4, t2 ; RV32I-NEXT: beqz s9, .LBB18_23 ; RV32I-NEXT: # %bb.22: ; RV32I-NEXT: mv t4, s0 ; RV32I-NEXT: .LBB18_23: +; RV32I-NEXT: slli s1, s1, 8 ; RV32I-NEXT: lbu s2, 9(a0) -; RV32I-NEXT: lbu s1, 10(a0) +; RV32I-NEXT: lbu s3, 10(a0) ; RV32I-NEXT: lbu s8, 13(a0) ; RV32I-NEXT: lbu ra, 14(a0) -; RV32I-NEXT: slli s3, s3, 8 ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: bltu s9, t5, .LBB18_25 ; RV32I-NEXT: # %bb.24: @@ -8658,12 +8612,12 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB18_25: ; RV32I-NEXT: sra s0, a4, a5 ; RV32I-NEXT: .LBB18_26: -; RV32I-NEXT: or s1, s3, s1 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or s1, s1, s3 ; RV32I-NEXT: lbu s5, 8(a0) ; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s4, s8, 8 -; RV32I-NEXT: or s8, a3, ra +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or ra, a3, ra ; RV32I-NEXT: bgeu a5, t6, .LBB18_28 ; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: or t4, t0, a6 @@ -8673,8 +8627,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: lbu t3, 7(a0) ; RV32I-NEXT: or a6, s2, s5 ; RV32I-NEXT: slli s2, s1, 16 -; RV32I-NEXT: or s1, s4, s3 -; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: or s1, s8, s3 +; RV32I-NEXT: slli ra, ra, 16 ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: beqz a5, .LBB18_30 @@ -8682,26 +8636,26 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: mv a1, t4 ; RV32I-NEXT: mv t0, s0 ; RV32I-NEXT: .LBB18_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) -; RV32I-NEXT: lbu a3, 2(a0) +; RV32I-NEXT: slli s8, a3, 8 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu s4, 2(a0) ; RV32I-NEXT: lbu s3, 5(a0) ; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s4, t3, 8 +; RV32I-NEXT: slli s5, t3, 8 ; RV32I-NEXT: or t4, s2, a6 -; RV32I-NEXT: or t3, s8, s1 +; RV32I-NEXT: or t3, ra, s1 ; RV32I-NEXT: bltu a5, t6, .LBB18_32 ; RV32I-NEXT: # %bb.31: ; RV32I-NEXT: srai a6, a4, 31 ; RV32I-NEXT: sw a6, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw a6, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: .LBB18_32: -; RV32I-NEXT: slli a6, ra, 8 -; RV32I-NEXT: or a3, s5, a3 +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a6, s8, s4 ; RV32I-NEXT: lbu s1, 0(a0) ; RV32I-NEXT: lbu a0, 4(a0) ; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: or s0, s4, s0 +; RV32I-NEXT: or s0, s5, s0 ; RV32I-NEXT: srl s2, t4, a5 ; RV32I-NEXT: sll ra, t3, s6 ; RV32I-NEXT: bltu a5, t5, .LBB18_34 @@ -8711,8 +8665,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: .LBB18_34: ; RV32I-NEXT: or s4, s2, ra ; RV32I-NEXT: .LBB18_35: -; RV32I-NEXT: or a6, a6, s1 -; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: or a3, a3, s1 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: or a0, s3, a0 ; RV32I-NEXT: slli s1, s0, 16 ; RV32I-NEXT: mv s5, t4 @@ -8720,7 +8674,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: # %bb.36: ; RV32I-NEXT: mv s5, s4 ; RV32I-NEXT: .LBB18_37: -; RV32I-NEXT: or s0, a3, a6 +; RV32I-NEXT: or s0, a6, a3 ; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: bltu a5, t5, .LBB18_39 ; RV32I-NEXT: # %bb.38: @@ -9158,88 +9112,88 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: or t0, t2, t1 ; RV64I-NEXT: lbu s8, 20(a0) ; RV64I-NEXT: lbu s9, 21(a0) ; RV64I-NEXT: lbu s10, 22(a0) ; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: slli s3, s3, 8 ; RV64I-NEXT: or a4, t4, t3 ; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: or t1, s1, s0 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: lbu t3, 24(a0) +; RV64I-NEXT: lbu t4, 25(a0) +; RV64I-NEXT: lbu t5, 26(a0) +; RV64I-NEXT: lbu t6, 27(a0) ; RV64I-NEXT: slli s5, s5, 8 ; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s9, s9, 8 +; RV64I-NEXT: or s0, s5, s4 +; RV64I-NEXT: or s1, s7, s6 +; RV64I-NEXT: or s2, s9, s8 +; RV64I-NEXT: lbu s3, 28(a0) +; RV64I-NEXT: lbu s4, 29(a0) +; RV64I-NEXT: lbu s5, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s11, s11, 8 +; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 +; RV64I-NEXT: slli s4, s4, 8 ; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: or s0, s1, s0 +; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or t4, t6, t5 +; RV64I-NEXT: or t5, s4, s3 ; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu s3, 1(a1) +; RV64I-NEXT: lbu s4, 2(a1) +; RV64I-NEXT: lbu s7, 3(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 +; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: slli s7, s7, 8 +; RV64I-NEXT: or s5, a0, s5 +; RV64I-NEXT: or t6, s3, t6 +; RV64I-NEXT: or s3, s7, s4 ; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: lbu s4, 5(a1) +; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or s4, s4, a0 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or a7, t3, t2 +; RV64I-NEXT: or s7, a1, s7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a1, t0, a7 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: or a0, t2, t1 +; RV64I-NEXT: slli s1, s1, 16 +; RV64I-NEXT: or s0, s1, s0 ; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 -; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t4, s0, t5 +; RV64I-NEXT: or a7, s6, s2 +; RV64I-NEXT: slli t4, t4, 16 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s5, s5, 16 +; RV64I-NEXT: or t4, s5, t5 ; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 +; RV64I-NEXT: or t5, s3, t6 ; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 +; RV64I-NEXT: or t6, s7, s4 ; RV64I-NEXT: li t0, 64 ; RV64I-NEXT: slli t3, a5, 16 ; RV64I-NEXT: slli t2, a6, 16 -; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a7, t1, a7 -; RV64I-NEXT: or a5, t5, t4 -; RV64I-NEXT: or a6, s0, t6 +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: slli t4, t4, 32 +; RV64I-NEXT: slli t6, t6, 32 +; RV64I-NEXT: or a7, a7, s0 +; RV64I-NEXT: or a5, t4, t1 +; RV64I-NEXT: or a6, t6, t5 ; RV64I-NEXT: slli a6, a6, 5 ; RV64I-NEXT: subw t1, a6, t0 ; RV64I-NEXT: negw t5, a6 @@ -9453,47 +9407,47 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: lbu t0, 21(a0) ; RV32I-NEXT: lbu t1, 22(a0) ; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) ; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 24(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: lbu t4, 26(a0) +; RV32I-NEXT: lbu t5, 27(a0) ; RV32I-NEXT: slli a6, a6, 8 ; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, t2, t1 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a4, t3, a4 ; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t3, t4, t3 -; RV32I-NEXT: or t4, t6, t5 -; RV32I-NEXT: or t0, t0, a7 +; RV32I-NEXT: lbu t1, 29(a0) +; RV32I-NEXT: lbu t2, 30(a0) +; RV32I-NEXT: lbu t3, 31(a0) +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: or t1, t1, a7 +; RV32I-NEXT: or t2, t3, t2 ; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t5, 1(a1) -; RV32I-NEXT: lbu t6, 2(a1) +; RV32I-NEXT: lbu t3, 1(a1) +; RV32I-NEXT: lbu t5, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or s0, t5, a7 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t3, t3, a7 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 +; RV32I-NEXT: or t6, a1, t5 ; RV32I-NEXT: li t5, 32 -; RV32I-NEXT: slli a7, a4, 16 -; RV32I-NEXT: slli a1, a5, 16 +; RV32I-NEXT: slli a7, a5, 16 +; RV32I-NEXT: slli a1, t0, 16 ; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli a5, t2, 16 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or a4, t1, t0 -; RV32I-NEXT: or a5, a5, s0 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: or t2, t4, a4 +; RV32I-NEXT: or a4, a5, t1 +; RV32I-NEXT: or a5, t6, t3 ; RV32I-NEXT: slli a5, a5, 5 ; RV32I-NEXT: srl s0, t2, a5 ; RV32I-NEXT: neg s6, a5 @@ -9559,6 +9513,7 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: .LBB19_18: ; RV32I-NEXT: neg s11, s9 ; RV32I-NEXT: sw s0, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s9, t5, .LBB19_20 ; RV32I-NEXT: # %bb.19: ; RV32I-NEXT: sra s0, a4, s9 @@ -9567,20 +9522,19 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: sll a3, a4, s11 ; RV32I-NEXT: or s0, s0, a3 ; RV32I-NEXT: .LBB19_21: -; RV32I-NEXT: sw s1, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw t4, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu s3, 11(a0) +; RV32I-NEXT: lbu s1, 11(a0) ; RV32I-NEXT: lbu a3, 15(a0) ; RV32I-NEXT: mv t4, t2 ; RV32I-NEXT: beqz s9, .LBB19_23 ; RV32I-NEXT: # %bb.22: ; RV32I-NEXT: mv t4, s0 ; RV32I-NEXT: .LBB19_23: +; RV32I-NEXT: slli s1, s1, 8 ; RV32I-NEXT: lbu s2, 9(a0) -; RV32I-NEXT: lbu s1, 10(a0) +; RV32I-NEXT: lbu s3, 10(a0) ; RV32I-NEXT: lbu s8, 13(a0) ; RV32I-NEXT: lbu ra, 14(a0) -; RV32I-NEXT: slli s3, s3, 8 ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: bltu s9, t5, .LBB19_25 ; RV32I-NEXT: # %bb.24: @@ -9589,12 +9543,12 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: .LBB19_25: ; RV32I-NEXT: sra s0, a4, a5 ; RV32I-NEXT: .LBB19_26: -; RV32I-NEXT: or s1, s3, s1 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or s1, s1, s3 ; RV32I-NEXT: lbu s5, 8(a0) ; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s4, s8, 8 -; RV32I-NEXT: or s8, a3, ra +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or ra, a3, ra ; RV32I-NEXT: bgeu a5, t6, .LBB19_28 ; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: or t4, t0, a6 @@ -9604,8 +9558,8 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: lbu t3, 7(a0) ; RV32I-NEXT: or a6, s2, s5 ; RV32I-NEXT: slli s2, s1, 16 -; RV32I-NEXT: or s1, s4, s3 -; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: or s1, s8, s3 +; RV32I-NEXT: slli ra, ra, 16 ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: beqz a5, .LBB19_30 @@ -9613,26 +9567,26 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: mv a1, t4 ; RV32I-NEXT: mv t0, s0 ; RV32I-NEXT: .LBB19_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) -; RV32I-NEXT: lbu a3, 2(a0) +; RV32I-NEXT: slli s8, a3, 8 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu s4, 2(a0) ; RV32I-NEXT: lbu s3, 5(a0) ; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s4, t3, 8 +; RV32I-NEXT: slli s5, t3, 8 ; RV32I-NEXT: or t4, s2, a6 -; RV32I-NEXT: or t3, s8, s1 +; RV32I-NEXT: or t3, ra, s1 ; RV32I-NEXT: bltu a5, t6, .LBB19_32 ; RV32I-NEXT: # %bb.31: ; RV32I-NEXT: srai a6, a4, 31 ; RV32I-NEXT: sw a6, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw a6, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: .LBB19_32: -; RV32I-NEXT: slli a6, ra, 8 -; RV32I-NEXT: or a3, s5, a3 +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a6, s8, s4 ; RV32I-NEXT: lbu s1, 0(a0) ; RV32I-NEXT: lbu a0, 4(a0) ; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: or s0, s4, s0 +; RV32I-NEXT: or s0, s5, s0 ; RV32I-NEXT: srl s2, t4, a5 ; RV32I-NEXT: sll ra, t3, s6 ; RV32I-NEXT: bltu a5, t5, .LBB19_34 @@ -9642,8 +9596,8 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: .LBB19_34: ; RV32I-NEXT: or s4, s2, ra ; RV32I-NEXT: .LBB19_35: -; RV32I-NEXT: or a6, a6, s1 -; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: or a3, a3, s1 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: or a0, s3, a0 ; RV32I-NEXT: slli s1, s0, 16 ; RV32I-NEXT: mv s5, t4 @@ -9651,7 +9605,7 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: # %bb.36: ; RV32I-NEXT: mv s5, s4 ; RV32I-NEXT: .LBB19_37: -; RV32I-NEXT: or s0, a3, a6 +; RV32I-NEXT: or s0, a6, a3 ; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: bltu a5, t5, .LBB19_39 ; RV32I-NEXT: # %bb.38: @@ -10089,88 +10043,88 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: or t0, t2, t1 ; RV64I-NEXT: lbu s8, 20(a0) ; RV64I-NEXT: lbu s9, 21(a0) ; RV64I-NEXT: lbu s10, 22(a0) ; RV64I-NEXT: lbu s11, 23(a0) -; RV64I-NEXT: slli t2, t2, 8 ; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 ; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: slli s3, s3, 8 ; RV64I-NEXT: or a4, t4, t3 ; RV64I-NEXT: or a6, t6, t5 -; RV64I-NEXT: or t0, s1, s0 -; RV64I-NEXT: lbu t5, 24(a0) -; RV64I-NEXT: lbu t6, 25(a0) -; RV64I-NEXT: lbu s0, 26(a0) -; RV64I-NEXT: lbu s1, 27(a0) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: or t1, s1, s0 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: lbu t3, 24(a0) +; RV64I-NEXT: lbu t4, 25(a0) +; RV64I-NEXT: lbu t5, 26(a0) +; RV64I-NEXT: lbu t6, 27(a0) ; RV64I-NEXT: slli s5, s5, 8 ; RV64I-NEXT: slli s7, s7, 8 -; RV64I-NEXT: or t4, s3, s2 -; RV64I-NEXT: or t2, s5, s4 -; RV64I-NEXT: or t3, s7, s6 -; RV64I-NEXT: lbu s2, 28(a0) -; RV64I-NEXT: lbu s3, 29(a0) -; RV64I-NEXT: lbu s4, 30(a0) -; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s9, s9, 8 +; RV64I-NEXT: or s0, s5, s4 +; RV64I-NEXT: or s1, s7, s6 +; RV64I-NEXT: or s2, s9, s8 +; RV64I-NEXT: lbu s3, 28(a0) +; RV64I-NEXT: lbu s4, 29(a0) +; RV64I-NEXT: lbu s5, 30(a0) +; RV64I-NEXT: lbu a0, 31(a0) ; RV64I-NEXT: slli s11, s11, 8 +; RV64I-NEXT: slli t4, t4, 8 ; RV64I-NEXT: slli t6, t6, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s5, s9, s8 +; RV64I-NEXT: slli s4, s4, 8 ; RV64I-NEXT: or s6, s11, s10 -; RV64I-NEXT: or t5, t6, t5 -; RV64I-NEXT: or s0, s1, s0 +; RV64I-NEXT: or t3, t4, t3 +; RV64I-NEXT: or t4, t6, t5 +; RV64I-NEXT: or t5, s4, s3 ; RV64I-NEXT: lbu t6, 0(a1) -; RV64I-NEXT: lbu s1, 1(a1) -; RV64I-NEXT: lbu s7, 2(a1) -; RV64I-NEXT: lbu s8, 3(a1) -; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: lbu s3, 1(a1) +; RV64I-NEXT: lbu s4, 2(a1) +; RV64I-NEXT: lbu s7, 3(a1) ; RV64I-NEXT: slli a0, a0, 8 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: or s3, a0, s4 -; RV64I-NEXT: or t6, s1, t6 +; RV64I-NEXT: slli s3, s3, 8 +; RV64I-NEXT: slli s7, s7, 8 +; RV64I-NEXT: or s5, a0, s5 +; RV64I-NEXT: or t6, s3, t6 +; RV64I-NEXT: or s3, s7, s4 ; RV64I-NEXT: lbu a0, 4(a1) -; RV64I-NEXT: lbu s1, 5(a1) -; RV64I-NEXT: lbu s4, 6(a1) +; RV64I-NEXT: lbu s4, 5(a1) +; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) -; RV64I-NEXT: slli s8, s8, 8 -; RV64I-NEXT: or s7, s8, s7 -; RV64I-NEXT: slli s1, s1, 8 -; RV64I-NEXT: or s1, s1, a0 +; RV64I-NEXT: slli s4, s4, 8 +; RV64I-NEXT: or s4, s4, a0 ; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: or s4, a1, s4 -; RV64I-NEXT: slli t1, t1, 16 -; RV64I-NEXT: or a1, t1, a7 -; RV64I-NEXT: slli t4, t4, 16 -; RV64I-NEXT: or a0, t4, t0 -; RV64I-NEXT: slli t3, t3, 16 -; RV64I-NEXT: or a7, t3, t2 +; RV64I-NEXT: or s7, a1, s7 +; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: or a1, t0, a7 +; RV64I-NEXT: slli t2, t2, 16 +; RV64I-NEXT: or a0, t2, t1 +; RV64I-NEXT: slli s1, s1, 16 +; RV64I-NEXT: or s0, s1, s0 ; RV64I-NEXT: slli s6, s6, 16 -; RV64I-NEXT: or t1, s6, s5 -; RV64I-NEXT: slli s0, s0, 16 -; RV64I-NEXT: or t4, s0, t5 +; RV64I-NEXT: or a7, s6, s2 +; RV64I-NEXT: slli t4, t4, 16 +; RV64I-NEXT: or t1, t4, t3 +; RV64I-NEXT: slli s5, s5, 16 +; RV64I-NEXT: or t4, s5, t5 ; RV64I-NEXT: slli s3, s3, 16 -; RV64I-NEXT: or t5, s3, s2 +; RV64I-NEXT: or t5, s3, t6 ; RV64I-NEXT: slli s7, s7, 16 -; RV64I-NEXT: or t6, s7, t6 -; RV64I-NEXT: slli s4, s4, 16 -; RV64I-NEXT: or s0, s4, s1 +; RV64I-NEXT: or t6, s7, s4 ; RV64I-NEXT: li t0, 64 ; RV64I-NEXT: slli t3, a5, 16 ; RV64I-NEXT: slli t2, a6, 16 -; RV64I-NEXT: slli t1, t1, 32 -; RV64I-NEXT: slli t5, t5, 32 -; RV64I-NEXT: slli s0, s0, 32 -; RV64I-NEXT: or a7, t1, a7 -; RV64I-NEXT: or a5, t5, t4 -; RV64I-NEXT: or a6, s0, t6 +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: slli t4, t4, 32 +; RV64I-NEXT: slli t6, t6, 32 +; RV64I-NEXT: or a7, a7, s0 +; RV64I-NEXT: or a5, t4, t1 +; RV64I-NEXT: or a6, t6, t5 ; RV64I-NEXT: slli a6, a6, 6 ; RV64I-NEXT: subw t1, a6, t0 ; RV64I-NEXT: negw t5, a6 @@ -10384,47 +10338,47 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: lbu t0, 21(a0) ; RV32I-NEXT: lbu t1, 22(a0) ; RV32I-NEXT: lbu t2, 23(a0) -; RV32I-NEXT: lbu t3, 24(a0) -; RV32I-NEXT: lbu t4, 25(a0) -; RV32I-NEXT: lbu t5, 26(a0) -; RV32I-NEXT: lbu t6, 27(a0) ; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 24(a0) +; RV32I-NEXT: lbu t3, 25(a0) +; RV32I-NEXT: lbu t4, 26(a0) +; RV32I-NEXT: lbu t5, 27(a0) ; RV32I-NEXT: slli a6, a6, 8 ; RV32I-NEXT: slli t0, t0, 8 ; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or a3, a4, a3 -; RV32I-NEXT: or a4, a6, a5 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: or a6, t0, a7 -; RV32I-NEXT: or a5, t2, t1 +; RV32I-NEXT: or t0, t2, t1 +; RV32I-NEXT: or a4, t3, a4 ; RV32I-NEXT: lbu a7, 28(a0) -; RV32I-NEXT: lbu t0, 29(a0) -; RV32I-NEXT: lbu t1, 30(a0) -; RV32I-NEXT: lbu t2, 31(a0) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: slli t6, t6, 8 -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or t3, t4, t3 -; RV32I-NEXT: or t4, t6, t5 -; RV32I-NEXT: or t0, t0, a7 +; RV32I-NEXT: lbu t1, 29(a0) +; RV32I-NEXT: lbu t2, 30(a0) +; RV32I-NEXT: lbu t3, 31(a0) +; RV32I-NEXT: slli t5, t5, 8 +; RV32I-NEXT: slli t1, t1, 8 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t4, t5, t4 +; RV32I-NEXT: or t1, t1, a7 +; RV32I-NEXT: or t2, t3, t2 ; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t5, 1(a1) -; RV32I-NEXT: lbu t6, 2(a1) +; RV32I-NEXT: lbu t3, 1(a1) +; RV32I-NEXT: lbu t5, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t2, t2, 8 -; RV32I-NEXT: or t1, t2, t1 -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or s0, t5, a7 +; RV32I-NEXT: slli t3, t3, 8 +; RV32I-NEXT: or t3, t3, a7 ; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: or t2, a1, t6 +; RV32I-NEXT: or t6, a1, t5 ; RV32I-NEXT: li t5, 32 -; RV32I-NEXT: slli a7, a4, 16 -; RV32I-NEXT: slli a1, a5, 16 +; RV32I-NEXT: slli a7, a5, 16 +; RV32I-NEXT: slli a1, t0, 16 ; RV32I-NEXT: slli t4, t4, 16 -; RV32I-NEXT: slli t1, t1, 16 ; RV32I-NEXT: slli a5, t2, 16 -; RV32I-NEXT: or t2, t4, t3 -; RV32I-NEXT: or a4, t1, t0 -; RV32I-NEXT: or a5, a5, s0 +; RV32I-NEXT: slli t6, t6, 16 +; RV32I-NEXT: or t2, t4, a4 +; RV32I-NEXT: or a4, a5, t1 +; RV32I-NEXT: or a5, t6, t3 ; RV32I-NEXT: slli a5, a5, 6 ; RV32I-NEXT: srl s0, t2, a5 ; RV32I-NEXT: neg s6, a5 @@ -10490,6 +10444,7 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: .LBB20_18: ; RV32I-NEXT: neg s11, s9 ; RV32I-NEXT: sw s0, 32(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s1, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: bltu s9, t5, .LBB20_20 ; RV32I-NEXT: # %bb.19: ; RV32I-NEXT: sra s0, a4, s9 @@ -10498,20 +10453,19 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: sll a3, a4, s11 ; RV32I-NEXT: or s0, s0, a3 ; RV32I-NEXT: .LBB20_21: -; RV32I-NEXT: sw s1, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw t4, 36(sp) # 4-byte Folded Spill -; RV32I-NEXT: lbu s3, 11(a0) +; RV32I-NEXT: lbu s1, 11(a0) ; RV32I-NEXT: lbu a3, 15(a0) ; RV32I-NEXT: mv t4, t2 ; RV32I-NEXT: beqz s9, .LBB20_23 ; RV32I-NEXT: # %bb.22: ; RV32I-NEXT: mv t4, s0 ; RV32I-NEXT: .LBB20_23: +; RV32I-NEXT: slli s1, s1, 8 ; RV32I-NEXT: lbu s2, 9(a0) -; RV32I-NEXT: lbu s1, 10(a0) +; RV32I-NEXT: lbu s3, 10(a0) ; RV32I-NEXT: lbu s8, 13(a0) ; RV32I-NEXT: lbu ra, 14(a0) -; RV32I-NEXT: slli s3, s3, 8 ; RV32I-NEXT: slli a3, a3, 8 ; RV32I-NEXT: bltu s9, t5, .LBB20_25 ; RV32I-NEXT: # %bb.24: @@ -10520,12 +10474,12 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: .LBB20_25: ; RV32I-NEXT: sra s0, a4, a5 ; RV32I-NEXT: .LBB20_26: -; RV32I-NEXT: or s1, s3, s1 +; RV32I-NEXT: slli s2, s2, 8 +; RV32I-NEXT: or s1, s1, s3 ; RV32I-NEXT: lbu s5, 8(a0) ; RV32I-NEXT: lbu s3, 12(a0) -; RV32I-NEXT: slli s2, s2, 8 -; RV32I-NEXT: slli s4, s8, 8 -; RV32I-NEXT: or s8, a3, ra +; RV32I-NEXT: slli s8, s8, 8 +; RV32I-NEXT: or ra, a3, ra ; RV32I-NEXT: bgeu a5, t6, .LBB20_28 ; RV32I-NEXT: # %bb.27: ; RV32I-NEXT: or t4, t0, a6 @@ -10535,8 +10489,8 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: lbu t3, 7(a0) ; RV32I-NEXT: or a6, s2, s5 ; RV32I-NEXT: slli s2, s1, 16 -; RV32I-NEXT: or s1, s4, s3 -; RV32I-NEXT: slli s8, s8, 16 +; RV32I-NEXT: or s1, s8, s3 +; RV32I-NEXT: slli ra, ra, 16 ; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: mv t0, a7 ; RV32I-NEXT: beqz a5, .LBB20_30 @@ -10544,26 +10498,26 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: mv a1, t4 ; RV32I-NEXT: mv t0, s0 ; RV32I-NEXT: .LBB20_30: -; RV32I-NEXT: slli s5, a3, 8 -; RV32I-NEXT: lbu ra, 1(a0) -; RV32I-NEXT: lbu a3, 2(a0) +; RV32I-NEXT: slli s8, a3, 8 +; RV32I-NEXT: lbu a3, 1(a0) +; RV32I-NEXT: lbu s4, 2(a0) ; RV32I-NEXT: lbu s3, 5(a0) ; RV32I-NEXT: lbu s0, 6(a0) -; RV32I-NEXT: slli s4, t3, 8 +; RV32I-NEXT: slli s5, t3, 8 ; RV32I-NEXT: or t4, s2, a6 -; RV32I-NEXT: or t3, s8, s1 +; RV32I-NEXT: or t3, ra, s1 ; RV32I-NEXT: bltu a5, t6, .LBB20_32 ; RV32I-NEXT: # %bb.31: ; RV32I-NEXT: srai a6, a4, 31 ; RV32I-NEXT: sw a6, 40(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw a6, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: .LBB20_32: -; RV32I-NEXT: slli a6, ra, 8 -; RV32I-NEXT: or a3, s5, a3 +; RV32I-NEXT: slli a3, a3, 8 +; RV32I-NEXT: or a6, s8, s4 ; RV32I-NEXT: lbu s1, 0(a0) ; RV32I-NEXT: lbu a0, 4(a0) ; RV32I-NEXT: slli s3, s3, 8 -; RV32I-NEXT: or s0, s4, s0 +; RV32I-NEXT: or s0, s5, s0 ; RV32I-NEXT: srl s2, t4, a5 ; RV32I-NEXT: sll ra, t3, s6 ; RV32I-NEXT: bltu a5, t5, .LBB20_34 @@ -10573,8 +10527,8 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: .LBB20_34: ; RV32I-NEXT: or s4, s2, ra ; RV32I-NEXT: .LBB20_35: -; RV32I-NEXT: or a6, a6, s1 -; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: or a3, a3, s1 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: or a0, s3, a0 ; RV32I-NEXT: slli s1, s0, 16 ; RV32I-NEXT: mv s5, t4 @@ -10582,7 +10536,7 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: # %bb.36: ; RV32I-NEXT: mv s5, s4 ; RV32I-NEXT: .LBB20_37: -; RV32I-NEXT: or s0, a3, a6 +; RV32I-NEXT: or s0, a6, a3 ; RV32I-NEXT: or a0, s1, a0 ; RV32I-NEXT: bltu a5, t5, .LBB20_39 ; RV32I-NEXT: # %bb.38: diff --git a/llvm/test/CodeGen/RISCV/abds-neg.ll b/llvm/test/CodeGen/RISCV/abds-neg.ll index c9a48acb8d14a..d7290e1e65540 100644 --- a/llvm/test/CodeGen/RISCV/abds-neg.ll +++ b/llvm/test/CodeGen/RISCV/abds-neg.ll @@ -625,42 +625,42 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: lw a1, 0(a2) +; RV32I-NEXT: lw a5, 4(a2) ; RV32I-NEXT: lw t0, 8(a2) ; RV32I-NEXT: lw t2, 12(a2) -; RV32I-NEXT: lw a1, 0(a2) -; RV32I-NEXT: lw a2, 4(a2) ; RV32I-NEXT: sltu t3, t0, a6 -; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: mv t5, t3 ; RV32I-NEXT: beq t1, t2, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t2, t1 +; RV32I-NEXT: slt t5, t2, t1 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sltu a5, a1, a3 -; RV32I-NEXT: sltu t6, a2, a4 -; RV32I-NEXT: mv a7, a5 -; RV32I-NEXT: beq a4, a2, .LBB11_4 +; RV32I-NEXT: sltu t4, a5, a4 +; RV32I-NEXT: sltu a2, a1, a3 +; RV32I-NEXT: mv a7, a2 +; RV32I-NEXT: beq a4, a5, .LBB11_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv a7, t6 +; RV32I-NEXT: mv a7, t4 ; RV32I-NEXT: .LBB11_4: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: xor t5, t1, t2 +; RV32I-NEXT: xor t6, t1, t2 ; RV32I-NEXT: xor s0, a6, t0 -; RV32I-NEXT: or t5, s0, t5 -; RV32I-NEXT: beqz t5, .LBB11_6 +; RV32I-NEXT: or t6, s0, t6 +; RV32I-NEXT: beqz t6, .LBB11_6 ; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: mv a7, t4 +; RV32I-NEXT: mv a7, t5 ; RV32I-NEXT: .LBB11_6: -; RV32I-NEXT: mv t5, a5 -; RV32I-NEXT: beq a2, a4, .LBB11_8 +; RV32I-NEXT: mv t5, a2 +; RV32I-NEXT: beq a5, a4, .LBB11_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv t5, t6 +; RV32I-NEXT: mv t5, t4 ; RV32I-NEXT: .LBB11_8: ; RV32I-NEXT: sltu t4, a3, a1 ; RV32I-NEXT: mv t6, t4 -; RV32I-NEXT: beq a4, a2, .LBB11_10 +; RV32I-NEXT: beq a4, a5, .LBB11_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t6, a4, a2 +; RV32I-NEXT: sltu t6, a4, a5 ; RV32I-NEXT: .LBB11_10: ; RV32I-NEXT: bnez a7, .LBB11_12 ; RV32I-NEXT: # %bb.11: @@ -684,12 +684,12 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: add t0, t0, t1 ; RV32I-NEXT: bnez a7, .LBB11_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: sub a2, a2, a4 -; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: sub a5, a5, a4 +; RV32I-NEXT: sub a2, a5, a2 ; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: j .LBB11_16 ; RV32I-NEXT: .LBB11_15: -; RV32I-NEXT: sub a4, a4, a2 +; RV32I-NEXT: sub a4, a4, a5 ; RV32I-NEXT: sub a2, a4, t4 ; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: .LBB11_16: @@ -744,42 +744,42 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t1, 12(a1) +; RV32ZBB-NEXT: lw a1, 0(a2) +; RV32ZBB-NEXT: lw a5, 4(a2) ; RV32ZBB-NEXT: lw t0, 8(a2) ; RV32ZBB-NEXT: lw t2, 12(a2) -; RV32ZBB-NEXT: lw a1, 0(a2) -; RV32ZBB-NEXT: lw a2, 4(a2) ; RV32ZBB-NEXT: sltu t3, t0, a6 -; RV32ZBB-NEXT: mv t4, t3 +; RV32ZBB-NEXT: mv t5, t3 ; RV32ZBB-NEXT: beq t1, t2, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t2, t1 +; RV32ZBB-NEXT: slt t5, t2, t1 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sltu a5, a1, a3 -; RV32ZBB-NEXT: sltu t6, a2, a4 -; RV32ZBB-NEXT: mv a7, a5 -; RV32ZBB-NEXT: beq a4, a2, .LBB11_4 +; RV32ZBB-NEXT: sltu t4, a5, a4 +; RV32ZBB-NEXT: sltu a2, a1, a3 +; RV32ZBB-NEXT: mv a7, a2 +; RV32ZBB-NEXT: beq a4, a5, .LBB11_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: mv a7, t6 +; RV32ZBB-NEXT: mv a7, t4 ; RV32ZBB-NEXT: .LBB11_4: ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBB-NEXT: xor t5, t1, t2 +; RV32ZBB-NEXT: xor t6, t1, t2 ; RV32ZBB-NEXT: xor s0, a6, t0 -; RV32ZBB-NEXT: or t5, s0, t5 -; RV32ZBB-NEXT: beqz t5, .LBB11_6 +; RV32ZBB-NEXT: or t6, s0, t6 +; RV32ZBB-NEXT: beqz t6, .LBB11_6 ; RV32ZBB-NEXT: # %bb.5: -; RV32ZBB-NEXT: mv a7, t4 +; RV32ZBB-NEXT: mv a7, t5 ; RV32ZBB-NEXT: .LBB11_6: -; RV32ZBB-NEXT: mv t5, a5 -; RV32ZBB-NEXT: beq a2, a4, .LBB11_8 +; RV32ZBB-NEXT: mv t5, a2 +; RV32ZBB-NEXT: beq a5, a4, .LBB11_8 ; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: mv t5, t6 +; RV32ZBB-NEXT: mv t5, t4 ; RV32ZBB-NEXT: .LBB11_8: ; RV32ZBB-NEXT: sltu t4, a3, a1 ; RV32ZBB-NEXT: mv t6, t4 -; RV32ZBB-NEXT: beq a4, a2, .LBB11_10 +; RV32ZBB-NEXT: beq a4, a5, .LBB11_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t6, a4, a2 +; RV32ZBB-NEXT: sltu t6, a4, a5 ; RV32ZBB-NEXT: .LBB11_10: ; RV32ZBB-NEXT: bnez a7, .LBB11_12 ; RV32ZBB-NEXT: # %bb.11: @@ -803,12 +803,12 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: add t0, t0, t1 ; RV32ZBB-NEXT: bnez a7, .LBB11_15 ; RV32ZBB-NEXT: # %bb.14: -; RV32ZBB-NEXT: sub a2, a2, a4 -; RV32ZBB-NEXT: sub a2, a2, a5 +; RV32ZBB-NEXT: sub a5, a5, a4 +; RV32ZBB-NEXT: sub a2, a5, a2 ; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: j .LBB11_16 ; RV32ZBB-NEXT: .LBB11_15: -; RV32ZBB-NEXT: sub a4, a4, a2 +; RV32ZBB-NEXT: sub a4, a4, a5 ; RV32ZBB-NEXT: sub a2, a4, t4 ; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: .LBB11_16: @@ -872,42 +872,42 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: lw a1, 0(a2) +; RV32I-NEXT: lw a5, 4(a2) ; RV32I-NEXT: lw t0, 8(a2) ; RV32I-NEXT: lw t2, 12(a2) -; RV32I-NEXT: lw a1, 0(a2) -; RV32I-NEXT: lw a2, 4(a2) ; RV32I-NEXT: sltu t3, t0, a6 -; RV32I-NEXT: mv t4, t3 +; RV32I-NEXT: mv t5, t3 ; RV32I-NEXT: beq t1, t2, .LBB12_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t2, t1 +; RV32I-NEXT: slt t5, t2, t1 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: sltu a5, a1, a3 -; RV32I-NEXT: sltu t6, a2, a4 -; RV32I-NEXT: mv a7, a5 -; RV32I-NEXT: beq a4, a2, .LBB12_4 +; RV32I-NEXT: sltu t4, a5, a4 +; RV32I-NEXT: sltu a2, a1, a3 +; RV32I-NEXT: mv a7, a2 +; RV32I-NEXT: beq a4, a5, .LBB12_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: mv a7, t6 +; RV32I-NEXT: mv a7, t4 ; RV32I-NEXT: .LBB12_4: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: xor t5, t1, t2 +; RV32I-NEXT: xor t6, t1, t2 ; RV32I-NEXT: xor s0, a6, t0 -; RV32I-NEXT: or t5, s0, t5 -; RV32I-NEXT: beqz t5, .LBB12_6 +; RV32I-NEXT: or t6, s0, t6 +; RV32I-NEXT: beqz t6, .LBB12_6 ; RV32I-NEXT: # %bb.5: -; RV32I-NEXT: mv a7, t4 +; RV32I-NEXT: mv a7, t5 ; RV32I-NEXT: .LBB12_6: -; RV32I-NEXT: mv t5, a5 -; RV32I-NEXT: beq a2, a4, .LBB12_8 +; RV32I-NEXT: mv t5, a2 +; RV32I-NEXT: beq a5, a4, .LBB12_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv t5, t6 +; RV32I-NEXT: mv t5, t4 ; RV32I-NEXT: .LBB12_8: ; RV32I-NEXT: sltu t4, a3, a1 ; RV32I-NEXT: mv t6, t4 -; RV32I-NEXT: beq a4, a2, .LBB12_10 +; RV32I-NEXT: beq a4, a5, .LBB12_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t6, a4, a2 +; RV32I-NEXT: sltu t6, a4, a5 ; RV32I-NEXT: .LBB12_10: ; RV32I-NEXT: bnez a7, .LBB12_12 ; RV32I-NEXT: # %bb.11: @@ -931,12 +931,12 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: add t0, t0, t1 ; RV32I-NEXT: bnez a7, .LBB12_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: sub a2, a2, a4 -; RV32I-NEXT: sub a2, a2, a5 +; RV32I-NEXT: sub a5, a5, a4 +; RV32I-NEXT: sub a2, a5, a2 ; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: j .LBB12_16 ; RV32I-NEXT: .LBB12_15: -; RV32I-NEXT: sub a4, a4, a2 +; RV32I-NEXT: sub a4, a4, a5 ; RV32I-NEXT: sub a2, a4, t4 ; RV32I-NEXT: sub a1, a3, a1 ; RV32I-NEXT: .LBB12_16: @@ -991,42 +991,42 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t1, 12(a1) +; RV32ZBB-NEXT: lw a1, 0(a2) +; RV32ZBB-NEXT: lw a5, 4(a2) ; RV32ZBB-NEXT: lw t0, 8(a2) ; RV32ZBB-NEXT: lw t2, 12(a2) -; RV32ZBB-NEXT: lw a1, 0(a2) -; RV32ZBB-NEXT: lw a2, 4(a2) ; RV32ZBB-NEXT: sltu t3, t0, a6 -; RV32ZBB-NEXT: mv t4, t3 +; RV32ZBB-NEXT: mv t5, t3 ; RV32ZBB-NEXT: beq t1, t2, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t2, t1 +; RV32ZBB-NEXT: slt t5, t2, t1 ; RV32ZBB-NEXT: .LBB12_2: -; RV32ZBB-NEXT: sltu a5, a1, a3 -; RV32ZBB-NEXT: sltu t6, a2, a4 -; RV32ZBB-NEXT: mv a7, a5 -; RV32ZBB-NEXT: beq a4, a2, .LBB12_4 +; RV32ZBB-NEXT: sltu t4, a5, a4 +; RV32ZBB-NEXT: sltu a2, a1, a3 +; RV32ZBB-NEXT: mv a7, a2 +; RV32ZBB-NEXT: beq a4, a5, .LBB12_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: mv a7, t6 +; RV32ZBB-NEXT: mv a7, t4 ; RV32ZBB-NEXT: .LBB12_4: ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBB-NEXT: xor t5, t1, t2 +; RV32ZBB-NEXT: xor t6, t1, t2 ; RV32ZBB-NEXT: xor s0, a6, t0 -; RV32ZBB-NEXT: or t5, s0, t5 -; RV32ZBB-NEXT: beqz t5, .LBB12_6 +; RV32ZBB-NEXT: or t6, s0, t6 +; RV32ZBB-NEXT: beqz t6, .LBB12_6 ; RV32ZBB-NEXT: # %bb.5: -; RV32ZBB-NEXT: mv a7, t4 +; RV32ZBB-NEXT: mv a7, t5 ; RV32ZBB-NEXT: .LBB12_6: -; RV32ZBB-NEXT: mv t5, a5 -; RV32ZBB-NEXT: beq a2, a4, .LBB12_8 +; RV32ZBB-NEXT: mv t5, a2 +; RV32ZBB-NEXT: beq a5, a4, .LBB12_8 ; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: mv t5, t6 +; RV32ZBB-NEXT: mv t5, t4 ; RV32ZBB-NEXT: .LBB12_8: ; RV32ZBB-NEXT: sltu t4, a3, a1 ; RV32ZBB-NEXT: mv t6, t4 -; RV32ZBB-NEXT: beq a4, a2, .LBB12_10 +; RV32ZBB-NEXT: beq a4, a5, .LBB12_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t6, a4, a2 +; RV32ZBB-NEXT: sltu t6, a4, a5 ; RV32ZBB-NEXT: .LBB12_10: ; RV32ZBB-NEXT: bnez a7, .LBB12_12 ; RV32ZBB-NEXT: # %bb.11: @@ -1050,12 +1050,12 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: add t0, t0, t1 ; RV32ZBB-NEXT: bnez a7, .LBB12_15 ; RV32ZBB-NEXT: # %bb.14: -; RV32ZBB-NEXT: sub a2, a2, a4 -; RV32ZBB-NEXT: sub a2, a2, a5 +; RV32ZBB-NEXT: sub a5, a5, a4 +; RV32ZBB-NEXT: sub a2, a5, a2 ; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: j .LBB12_16 ; RV32ZBB-NEXT: .LBB12_15: -; RV32ZBB-NEXT: sub a4, a4, a2 +; RV32ZBB-NEXT: sub a4, a4, a5 ; RV32ZBB-NEXT: sub a2, a4, t4 ; RV32ZBB-NEXT: sub a1, a3, a1 ; RV32ZBB-NEXT: .LBB12_16: @@ -1382,30 +1382,30 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_minmax_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a6, 4(a2) -; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw a5, 4(a2) +; RV32I-NEXT: lw a6, 8(a2) ; RV32I-NEXT: lw t0, 12(a2) -; RV32I-NEXT: lw a5, 12(a1) ; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: beq a5, t0, .LBB17_2 +; RV32I-NEXT: lw a7, 12(a1) +; RV32I-NEXT: beq a7, t0, .LBB17_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t1, a5, t0 +; RV32I-NEXT: slt t1, a7, t0 ; RV32I-NEXT: j .LBB17_3 ; RV32I-NEXT: .LBB17_2: -; RV32I-NEXT: sltu t1, a4, a7 +; RV32I-NEXT: sltu t1, a4, a6 ; RV32I-NEXT: .LBB17_3: ; RV32I-NEXT: lw t2, 0(a2) ; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: beq a3, a6, .LBB17_5 +; RV32I-NEXT: beq a3, a5, .LBB17_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sltu t6, a3, a6 +; RV32I-NEXT: sltu t6, a3, a5 ; RV32I-NEXT: j .LBB17_6 ; RV32I-NEXT: .LBB17_5: ; RV32I-NEXT: sltu t6, a1, t2 ; RV32I-NEXT: .LBB17_6: -; RV32I-NEXT: xor a2, a5, t0 -; RV32I-NEXT: xor t3, a4, a7 +; RV32I-NEXT: xor a2, a7, t0 +; RV32I-NEXT: xor t3, a4, a6 ; RV32I-NEXT: or t5, t3, a2 ; RV32I-NEXT: beqz t5, .LBB17_8 ; RV32I-NEXT: # %bb.7: @@ -1413,27 +1413,27 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: .LBB17_8: ; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: mv t1, a3 -; RV32I-NEXT: mv t4, a5 +; RV32I-NEXT: mv t4, a7 ; RV32I-NEXT: mv t3, a4 ; RV32I-NEXT: bnez t6, .LBB17_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a2, t2 -; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: mv t1, a5 ; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: mv t3, a7 +; RV32I-NEXT: mv t3, a6 ; RV32I-NEXT: .LBB17_10: -; RV32I-NEXT: beq a5, t0, .LBB17_12 +; RV32I-NEXT: beq a7, t0, .LBB17_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: slt t6, t0, a5 +; RV32I-NEXT: slt t6, t0, a7 ; RV32I-NEXT: j .LBB17_13 ; RV32I-NEXT: .LBB17_12: -; RV32I-NEXT: sltu t6, a7, a4 +; RV32I-NEXT: sltu t6, a6, a4 ; RV32I-NEXT: .LBB17_13: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: beq a3, a6, .LBB17_15 +; RV32I-NEXT: beq a3, a5, .LBB17_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: sltu s0, a6, a3 +; RV32I-NEXT: sltu s0, a5, a3 ; RV32I-NEXT: bnez t5, .LBB17_16 ; RV32I-NEXT: j .LBB17_17 ; RV32I-NEXT: .LBB17_15: @@ -1445,14 +1445,14 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bnez s0, .LBB17_19 ; RV32I-NEXT: # %bb.18: ; RV32I-NEXT: mv a1, t2 -; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a7, t0 +; RV32I-NEXT: mv a4, a6 ; RV32I-NEXT: .LBB17_19: -; RV32I-NEXT: sltu a7, t3, a4 -; RV32I-NEXT: sub a5, t4, a5 +; RV32I-NEXT: sltu a5, t3, a4 +; RV32I-NEXT: sub a6, t4, a7 +; RV32I-NEXT: sub a5, a6, a5 ; RV32I-NEXT: sltu a6, a2, a1 -; RV32I-NEXT: sub a5, a5, a7 ; RV32I-NEXT: mv a7, a6 ; RV32I-NEXT: beq t1, a3, .LBB17_21 ; RV32I-NEXT: # %bb.20: @@ -1509,30 +1509,30 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_minmax_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a6, 4(a2) -; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw a5, 4(a2) +; RV32ZBB-NEXT: lw a6, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a2) -; RV32ZBB-NEXT: lw a5, 12(a1) ; RV32ZBB-NEXT: lw a3, 4(a1) ; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: beq a5, t0, .LBB17_2 +; RV32ZBB-NEXT: lw a7, 12(a1) +; RV32ZBB-NEXT: beq a7, t0, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t1, a5, t0 +; RV32ZBB-NEXT: slt t1, a7, t0 ; RV32ZBB-NEXT: j .LBB17_3 ; RV32ZBB-NEXT: .LBB17_2: -; RV32ZBB-NEXT: sltu t1, a4, a7 +; RV32ZBB-NEXT: sltu t1, a4, a6 ; RV32ZBB-NEXT: .LBB17_3: ; RV32ZBB-NEXT: lw t2, 0(a2) ; RV32ZBB-NEXT: lw a1, 0(a1) -; RV32ZBB-NEXT: beq a3, a6, .LBB17_5 +; RV32ZBB-NEXT: beq a3, a5, .LBB17_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sltu t6, a3, a6 +; RV32ZBB-NEXT: sltu t6, a3, a5 ; RV32ZBB-NEXT: j .LBB17_6 ; RV32ZBB-NEXT: .LBB17_5: ; RV32ZBB-NEXT: sltu t6, a1, t2 ; RV32ZBB-NEXT: .LBB17_6: -; RV32ZBB-NEXT: xor a2, a5, t0 -; RV32ZBB-NEXT: xor t3, a4, a7 +; RV32ZBB-NEXT: xor a2, a7, t0 +; RV32ZBB-NEXT: xor t3, a4, a6 ; RV32ZBB-NEXT: or t5, t3, a2 ; RV32ZBB-NEXT: beqz t5, .LBB17_8 ; RV32ZBB-NEXT: # %bb.7: @@ -1540,27 +1540,27 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: .LBB17_8: ; RV32ZBB-NEXT: mv a2, a1 ; RV32ZBB-NEXT: mv t1, a3 -; RV32ZBB-NEXT: mv t4, a5 +; RV32ZBB-NEXT: mv t4, a7 ; RV32ZBB-NEXT: mv t3, a4 ; RV32ZBB-NEXT: bnez t6, .LBB17_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a2, t2 -; RV32ZBB-NEXT: mv t1, a6 +; RV32ZBB-NEXT: mv t1, a5 ; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: mv t3, a7 +; RV32ZBB-NEXT: mv t3, a6 ; RV32ZBB-NEXT: .LBB17_10: -; RV32ZBB-NEXT: beq a5, t0, .LBB17_12 +; RV32ZBB-NEXT: beq a7, t0, .LBB17_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: slt t6, t0, a5 +; RV32ZBB-NEXT: slt t6, t0, a7 ; RV32ZBB-NEXT: j .LBB17_13 ; RV32ZBB-NEXT: .LBB17_12: -; RV32ZBB-NEXT: sltu t6, a7, a4 +; RV32ZBB-NEXT: sltu t6, a6, a4 ; RV32ZBB-NEXT: .LBB17_13: ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBB-NEXT: beq a3, a6, .LBB17_15 +; RV32ZBB-NEXT: beq a3, a5, .LBB17_15 ; RV32ZBB-NEXT: # %bb.14: -; RV32ZBB-NEXT: sltu s0, a6, a3 +; RV32ZBB-NEXT: sltu s0, a5, a3 ; RV32ZBB-NEXT: bnez t5, .LBB17_16 ; RV32ZBB-NEXT: j .LBB17_17 ; RV32ZBB-NEXT: .LBB17_15: @@ -1572,14 +1572,14 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: bnez s0, .LBB17_19 ; RV32ZBB-NEXT: # %bb.18: ; RV32ZBB-NEXT: mv a1, t2 -; RV32ZBB-NEXT: mv a3, a6 -; RV32ZBB-NEXT: mv a5, t0 -; RV32ZBB-NEXT: mv a4, a7 +; RV32ZBB-NEXT: mv a3, a5 +; RV32ZBB-NEXT: mv a7, t0 +; RV32ZBB-NEXT: mv a4, a6 ; RV32ZBB-NEXT: .LBB17_19: -; RV32ZBB-NEXT: sltu a7, t3, a4 -; RV32ZBB-NEXT: sub a5, t4, a5 +; RV32ZBB-NEXT: sltu a5, t3, a4 +; RV32ZBB-NEXT: sub a6, t4, a7 +; RV32ZBB-NEXT: sub a5, a6, a5 ; RV32ZBB-NEXT: sltu a6, a2, a1 -; RV32ZBB-NEXT: sub a5, a5, a7 ; RV32ZBB-NEXT: mv a7, a6 ; RV32ZBB-NEXT: beq t1, a3, .LBB17_21 ; RV32ZBB-NEXT: # %bb.20: @@ -1862,26 +1862,26 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) ; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a7, 12(a2) -; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw t0, 12(a2) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a6, a5 -; RV32I-NEXT: mv t4, t1 -; RV32I-NEXT: beq t0, a7, .LBB22_2 +; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: sltu a1, a7, a6 +; RV32I-NEXT: mv t4, a1 +; RV32I-NEXT: beq t1, t0, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t4, t0, a7 +; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB22_2: ; RV32I-NEXT: sltu t2, a2, a3 ; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a1, a4, .LBB22_4 +; RV32I-NEXT: beq a5, a4, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t3, a1, a4 +; RV32I-NEXT: sltu t3, a5, a4 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: xor t5, t0, a7 -; RV32I-NEXT: xor t6, a6, a5 +; RV32I-NEXT: xor t5, t1, t0 +; RV32I-NEXT: xor t6, a7, a6 ; RV32I-NEXT: or t5, t6, t5 ; RV32I-NEXT: mv t6, t3 ; RV32I-NEXT: beqz t5, .LBB22_6 @@ -1890,32 +1890,32 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: .LBB22_6: ; RV32I-NEXT: sltu t4, a3, a2 ; RV32I-NEXT: mv t5, t4 -; RV32I-NEXT: beq a1, a4, .LBB22_8 +; RV32I-NEXT: beq a5, a4, .LBB22_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: sltu t5, a4, a1 +; RV32I-NEXT: sltu t5, a4, a5 ; RV32I-NEXT: .LBB22_8: ; RV32I-NEXT: bnez t6, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t1, a5, a6 -; RV32I-NEXT: sub a7, a7, t0 -; RV32I-NEXT: sub a5, a5, a6 -; RV32I-NEXT: sub a4, a4, a1 -; RV32I-NEXT: sub a6, a7, t1 -; RV32I-NEXT: sltu a7, a5, t5 -; RV32I-NEXT: sub a1, a5, t5 +; RV32I-NEXT: sltu a1, a6, a7 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a6, a6, a7 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a7, t0, a1 +; RV32I-NEXT: sltu t0, a6, t5 +; RV32I-NEXT: sub a1, a6, t5 ; RV32I-NEXT: sub a5, a4, t4 -; RV32I-NEXT: sub a4, a6, a7 +; RV32I-NEXT: sub a4, a7, t0 ; RV32I-NEXT: sub a2, a3, a2 ; RV32I-NEXT: j .LBB22_11 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: sub a7, t0, a7 -; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: sub a4, a1, a4 -; RV32I-NEXT: sub a6, a7, t1 -; RV32I-NEXT: sltu a7, a5, t3 -; RV32I-NEXT: sub a1, a5, t3 -; RV32I-NEXT: sub a5, a4, t2 -; RV32I-NEXT: sub a4, a6, a7 +; RV32I-NEXT: sub t0, t1, t0 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a5, a5, a4 +; RV32I-NEXT: sub a4, t0, a1 +; RV32I-NEXT: sltu a7, a6, t3 +; RV32I-NEXT: sub a1, a6, t3 +; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a4, a4, a7 ; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: .LBB22_11: ; RV32I-NEXT: sw a2, 0(a0) @@ -1949,26 +1949,26 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a7, 12(a2) -; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw t0, 12(a2) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a6, a5 -; RV32ZBB-NEXT: mv t4, t1 -; RV32ZBB-NEXT: beq t0, a7, .LBB22_2 +; RV32ZBB-NEXT: lw a5, 4(a1) +; RV32ZBB-NEXT: lw a7, 8(a1) +; RV32ZBB-NEXT: lw t1, 12(a1) +; RV32ZBB-NEXT: sltu a1, a7, a6 +; RV32ZBB-NEXT: mv t4, a1 +; RV32ZBB-NEXT: beq t1, t0, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: slt t4, t0, a7 +; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB22_2: ; RV32ZBB-NEXT: sltu t2, a2, a3 ; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a1, a4, .LBB22_4 +; RV32ZBB-NEXT: beq a5, a4, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t3, a1, a4 +; RV32ZBB-NEXT: sltu t3, a5, a4 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: xor t5, t0, a7 -; RV32ZBB-NEXT: xor t6, a6, a5 +; RV32ZBB-NEXT: xor t5, t1, t0 +; RV32ZBB-NEXT: xor t6, a7, a6 ; RV32ZBB-NEXT: or t5, t6, t5 ; RV32ZBB-NEXT: mv t6, t3 ; RV32ZBB-NEXT: beqz t5, .LBB22_6 @@ -1977,32 +1977,32 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: .LBB22_6: ; RV32ZBB-NEXT: sltu t4, a3, a2 ; RV32ZBB-NEXT: mv t5, t4 -; RV32ZBB-NEXT: beq a1, a4, .LBB22_8 +; RV32ZBB-NEXT: beq a5, a4, .LBB22_8 ; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: sltu t5, a4, a1 +; RV32ZBB-NEXT: sltu t5, a4, a5 ; RV32ZBB-NEXT: .LBB22_8: ; RV32ZBB-NEXT: bnez t6, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t1, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t0 -; RV32ZBB-NEXT: sub a5, a5, a6 -; RV32ZBB-NEXT: sub a4, a4, a1 -; RV32ZBB-NEXT: sub a6, a7, t1 -; RV32ZBB-NEXT: sltu a7, a5, t5 -; RV32ZBB-NEXT: sub a1, a5, t5 +; RV32ZBB-NEXT: sltu a1, a6, a7 +; RV32ZBB-NEXT: sub t0, t0, t1 +; RV32ZBB-NEXT: sub a6, a6, a7 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a7, t0, a1 +; RV32ZBB-NEXT: sltu t0, a6, t5 +; RV32ZBB-NEXT: sub a1, a6, t5 ; RV32ZBB-NEXT: sub a5, a4, t4 -; RV32ZBB-NEXT: sub a4, a6, a7 +; RV32ZBB-NEXT: sub a4, a7, t0 ; RV32ZBB-NEXT: sub a2, a3, a2 ; RV32ZBB-NEXT: j .LBB22_11 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: sub a7, t0, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 -; RV32ZBB-NEXT: sub a4, a1, a4 -; RV32ZBB-NEXT: sub a6, a7, t1 -; RV32ZBB-NEXT: sltu a7, a5, t3 -; RV32ZBB-NEXT: sub a1, a5, t3 -; RV32ZBB-NEXT: sub a5, a4, t2 -; RV32ZBB-NEXT: sub a4, a6, a7 +; RV32ZBB-NEXT: sub t0, t1, t0 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a5, a5, a4 +; RV32ZBB-NEXT: sub a4, t0, a1 +; RV32ZBB-NEXT: sltu a7, a6, t3 +; RV32ZBB-NEXT: sub a1, a6, t3 +; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a4, a4, a7 ; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: .LBB22_11: ; RV32ZBB-NEXT: sw a2, 0(a0) @@ -2391,53 +2391,53 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) ; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a6, 12(a2) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a7, 4(a1) -; RV32I-NEXT: sltu a1, t0, a5 -; RV32I-NEXT: sub t1, t1, a6 -; RV32I-NEXT: sltu a6, a2, a3 -; RV32I-NEXT: sub a1, t1, a1 -; RV32I-NEXT: mv t1, a6 -; RV32I-NEXT: beq a7, a4, .LBB31_2 +; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw t1, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sltu t0, t1, a6 +; RV32I-NEXT: sub a1, a1, a7 +; RV32I-NEXT: sub a1, a1, t0 +; RV32I-NEXT: sltu a7, a2, a3 +; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: beq a5, a4, .LBB31_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a7, a4 +; RV32I-NEXT: sltu t0, a5, a4 ; RV32I-NEXT: .LBB31_2: -; RV32I-NEXT: sub a5, t0, a5 -; RV32I-NEXT: sub a4, a7, a4 -; RV32I-NEXT: sub a3, a2, a3 -; RV32I-NEXT: sltu a2, a5, t1 -; RV32I-NEXT: sub t0, a4, a6 -; RV32I-NEXT: sub a4, a5, t1 -; RV32I-NEXT: sub a5, a1, a2 +; RV32I-NEXT: sub a6, t1, a6 +; RV32I-NEXT: sub a5, a5, a4 +; RV32I-NEXT: sub a2, a2, a3 +; RV32I-NEXT: sltu a3, a6, t0 +; RV32I-NEXT: sub t1, a5, a7 +; RV32I-NEXT: sub a4, a6, t0 +; RV32I-NEXT: sub a5, a1, a3 ; RV32I-NEXT: srai a1, a5, 31 -; RV32I-NEXT: xor a2, a4, a1 -; RV32I-NEXT: xor a5, a5, a1 -; RV32I-NEXT: xor a4, a3, a1 -; RV32I-NEXT: sltu a3, a1, a2 -; RV32I-NEXT: sub a6, a1, a5 -; RV32I-NEXT: sltu a5, a1, a4 -; RV32I-NEXT: sub a3, a6, a3 -; RV32I-NEXT: xor a7, t0, a1 -; RV32I-NEXT: mv a6, a5 -; RV32I-NEXT: beqz t0, .LBB31_4 +; RV32I-NEXT: xor a3, a4, a1 +; RV32I-NEXT: xor a6, a5, a1 +; RV32I-NEXT: xor a5, t1, a1 +; RV32I-NEXT: xor a4, a2, a1 +; RV32I-NEXT: sltu a2, a1, a3 +; RV32I-NEXT: sub a6, a1, a6 +; RV32I-NEXT: sub a2, a6, a2 +; RV32I-NEXT: sltu a6, a1, a4 +; RV32I-NEXT: mv a7, a6 +; RV32I-NEXT: beqz t1, .LBB31_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu a6, a1, a7 +; RV32I-NEXT: sltu a7, a1, a5 ; RV32I-NEXT: .LBB31_4: -; RV32I-NEXT: sub a2, a1, a2 -; RV32I-NEXT: sub a7, a1, a7 +; RV32I-NEXT: sub a3, a1, a3 +; RV32I-NEXT: sub a5, a1, a5 ; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sltu a4, a2, a6 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sltu a4, a3, a7 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sub a5, a5, a6 +; RV32I-NEXT: sub a2, a2, a4 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_subnsw_i128: @@ -2459,53 +2459,53 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a6, 12(a2) -; RV32ZBB-NEXT: lw t0, 8(a1) -; RV32ZBB-NEXT: lw t1, 12(a1) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a7, 4(a1) -; RV32ZBB-NEXT: sltu a1, t0, a5 -; RV32ZBB-NEXT: sub t1, t1, a6 -; RV32ZBB-NEXT: sltu a6, a2, a3 -; RV32ZBB-NEXT: sub a1, t1, a1 -; RV32ZBB-NEXT: mv t1, a6 -; RV32ZBB-NEXT: beq a7, a4, .LBB31_2 +; RV32ZBB-NEXT: lw a5, 4(a1) +; RV32ZBB-NEXT: lw t1, 8(a1) +; RV32ZBB-NEXT: lw a1, 12(a1) +; RV32ZBB-NEXT: sltu t0, t1, a6 +; RV32ZBB-NEXT: sub a1, a1, a7 +; RV32ZBB-NEXT: sub a1, a1, t0 +; RV32ZBB-NEXT: sltu a7, a2, a3 +; RV32ZBB-NEXT: mv t0, a7 +; RV32ZBB-NEXT: beq a5, a4, .LBB31_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a7, a4 +; RV32ZBB-NEXT: sltu t0, a5, a4 ; RV32ZBB-NEXT: .LBB31_2: -; RV32ZBB-NEXT: sub a5, t0, a5 -; RV32ZBB-NEXT: sub a4, a7, a4 -; RV32ZBB-NEXT: sub a3, a2, a3 -; RV32ZBB-NEXT: sltu a2, a5, t1 -; RV32ZBB-NEXT: sub t0, a4, a6 -; RV32ZBB-NEXT: sub a4, a5, t1 -; RV32ZBB-NEXT: sub a5, a1, a2 +; RV32ZBB-NEXT: sub a6, t1, a6 +; RV32ZBB-NEXT: sub a5, a5, a4 +; RV32ZBB-NEXT: sub a2, a2, a3 +; RV32ZBB-NEXT: sltu a3, a6, t0 +; RV32ZBB-NEXT: sub t1, a5, a7 +; RV32ZBB-NEXT: sub a4, a6, t0 +; RV32ZBB-NEXT: sub a5, a1, a3 ; RV32ZBB-NEXT: srai a1, a5, 31 -; RV32ZBB-NEXT: xor a2, a4, a1 -; RV32ZBB-NEXT: xor a5, a5, a1 -; RV32ZBB-NEXT: xor a4, a3, a1 -; RV32ZBB-NEXT: sltu a3, a1, a2 -; RV32ZBB-NEXT: sub a6, a1, a5 -; RV32ZBB-NEXT: sltu a5, a1, a4 -; RV32ZBB-NEXT: sub a3, a6, a3 -; RV32ZBB-NEXT: xor a7, t0, a1 -; RV32ZBB-NEXT: mv a6, a5 -; RV32ZBB-NEXT: beqz t0, .LBB31_4 +; RV32ZBB-NEXT: xor a3, a4, a1 +; RV32ZBB-NEXT: xor a6, a5, a1 +; RV32ZBB-NEXT: xor a5, t1, a1 +; RV32ZBB-NEXT: xor a4, a2, a1 +; RV32ZBB-NEXT: sltu a2, a1, a3 +; RV32ZBB-NEXT: sub a6, a1, a6 +; RV32ZBB-NEXT: sub a2, a6, a2 +; RV32ZBB-NEXT: sltu a6, a1, a4 +; RV32ZBB-NEXT: mv a7, a6 +; RV32ZBB-NEXT: beqz t1, .LBB31_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu a6, a1, a7 +; RV32ZBB-NEXT: sltu a7, a1, a5 ; RV32ZBB-NEXT: .LBB31_4: -; RV32ZBB-NEXT: sub a2, a1, a2 -; RV32ZBB-NEXT: sub a7, a1, a7 +; RV32ZBB-NEXT: sub a3, a1, a3 +; RV32ZBB-NEXT: sub a5, a1, a5 ; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sltu a4, a2, a6 -; RV32ZBB-NEXT: sub a2, a2, a6 -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sltu a4, a3, a7 +; RV32ZBB-NEXT: sub a3, a3, a7 +; RV32ZBB-NEXT: sub a5, a5, a6 +; RV32ZBB-NEXT: sub a2, a2, a4 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_subnsw_i128: @@ -2533,53 +2533,53 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) ; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a6, 12(a2) -; RV32I-NEXT: lw t0, 8(a1) -; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a7, 4(a1) -; RV32I-NEXT: sltu a1, t0, a5 -; RV32I-NEXT: sub t1, t1, a6 -; RV32I-NEXT: sltu a6, a2, a3 -; RV32I-NEXT: sub a1, t1, a1 -; RV32I-NEXT: mv t1, a6 -; RV32I-NEXT: beq a7, a4, .LBB32_2 +; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw t1, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sltu t0, t1, a6 +; RV32I-NEXT: sub a1, a1, a7 +; RV32I-NEXT: sub a1, a1, t0 +; RV32I-NEXT: sltu a7, a2, a3 +; RV32I-NEXT: mv t0, a7 +; RV32I-NEXT: beq a5, a4, .LBB32_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a7, a4 +; RV32I-NEXT: sltu t0, a5, a4 ; RV32I-NEXT: .LBB32_2: -; RV32I-NEXT: sub a5, t0, a5 -; RV32I-NEXT: sub a4, a7, a4 -; RV32I-NEXT: sub a3, a2, a3 -; RV32I-NEXT: sltu a2, a5, t1 -; RV32I-NEXT: sub t0, a4, a6 -; RV32I-NEXT: sub a4, a5, t1 -; RV32I-NEXT: sub a5, a1, a2 +; RV32I-NEXT: sub a6, t1, a6 +; RV32I-NEXT: sub a5, a5, a4 +; RV32I-NEXT: sub a2, a2, a3 +; RV32I-NEXT: sltu a3, a6, t0 +; RV32I-NEXT: sub t1, a5, a7 +; RV32I-NEXT: sub a4, a6, t0 +; RV32I-NEXT: sub a5, a1, a3 ; RV32I-NEXT: srai a1, a5, 31 -; RV32I-NEXT: xor a2, a4, a1 -; RV32I-NEXT: xor a5, a5, a1 -; RV32I-NEXT: xor a4, a3, a1 -; RV32I-NEXT: sltu a3, a1, a2 -; RV32I-NEXT: sub a6, a1, a5 -; RV32I-NEXT: sltu a5, a1, a4 -; RV32I-NEXT: sub a3, a6, a3 -; RV32I-NEXT: xor a7, t0, a1 -; RV32I-NEXT: mv a6, a5 -; RV32I-NEXT: beqz t0, .LBB32_4 +; RV32I-NEXT: xor a3, a4, a1 +; RV32I-NEXT: xor a6, a5, a1 +; RV32I-NEXT: xor a5, t1, a1 +; RV32I-NEXT: xor a4, a2, a1 +; RV32I-NEXT: sltu a2, a1, a3 +; RV32I-NEXT: sub a6, a1, a6 +; RV32I-NEXT: sub a2, a6, a2 +; RV32I-NEXT: sltu a6, a1, a4 +; RV32I-NEXT: mv a7, a6 +; RV32I-NEXT: beqz t1, .LBB32_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu a6, a1, a7 +; RV32I-NEXT: sltu a7, a1, a5 ; RV32I-NEXT: .LBB32_4: -; RV32I-NEXT: sub a2, a1, a2 -; RV32I-NEXT: sub a7, a1, a7 +; RV32I-NEXT: sub a3, a1, a3 +; RV32I-NEXT: sub a5, a1, a5 ; RV32I-NEXT: sub a1, a1, a4 -; RV32I-NEXT: sltu a4, a2, a6 -; RV32I-NEXT: sub a2, a2, a6 -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sltu a4, a3, a7 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sub a5, a5, a6 +; RV32I-NEXT: sub a2, a2, a4 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_subnsw_i128_undef: @@ -2601,53 +2601,53 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a6, 12(a2) -; RV32ZBB-NEXT: lw t0, 8(a1) -; RV32ZBB-NEXT: lw t1, 12(a1) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a7, 4(a1) -; RV32ZBB-NEXT: sltu a1, t0, a5 -; RV32ZBB-NEXT: sub t1, t1, a6 -; RV32ZBB-NEXT: sltu a6, a2, a3 -; RV32ZBB-NEXT: sub a1, t1, a1 -; RV32ZBB-NEXT: mv t1, a6 -; RV32ZBB-NEXT: beq a7, a4, .LBB32_2 +; RV32ZBB-NEXT: lw a5, 4(a1) +; RV32ZBB-NEXT: lw t1, 8(a1) +; RV32ZBB-NEXT: lw a1, 12(a1) +; RV32ZBB-NEXT: sltu t0, t1, a6 +; RV32ZBB-NEXT: sub a1, a1, a7 +; RV32ZBB-NEXT: sub a1, a1, t0 +; RV32ZBB-NEXT: sltu a7, a2, a3 +; RV32ZBB-NEXT: mv t0, a7 +; RV32ZBB-NEXT: beq a5, a4, .LBB32_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a7, a4 +; RV32ZBB-NEXT: sltu t0, a5, a4 ; RV32ZBB-NEXT: .LBB32_2: -; RV32ZBB-NEXT: sub a5, t0, a5 -; RV32ZBB-NEXT: sub a4, a7, a4 -; RV32ZBB-NEXT: sub a3, a2, a3 -; RV32ZBB-NEXT: sltu a2, a5, t1 -; RV32ZBB-NEXT: sub t0, a4, a6 -; RV32ZBB-NEXT: sub a4, a5, t1 -; RV32ZBB-NEXT: sub a5, a1, a2 +; RV32ZBB-NEXT: sub a6, t1, a6 +; RV32ZBB-NEXT: sub a5, a5, a4 +; RV32ZBB-NEXT: sub a2, a2, a3 +; RV32ZBB-NEXT: sltu a3, a6, t0 +; RV32ZBB-NEXT: sub t1, a5, a7 +; RV32ZBB-NEXT: sub a4, a6, t0 +; RV32ZBB-NEXT: sub a5, a1, a3 ; RV32ZBB-NEXT: srai a1, a5, 31 -; RV32ZBB-NEXT: xor a2, a4, a1 -; RV32ZBB-NEXT: xor a5, a5, a1 -; RV32ZBB-NEXT: xor a4, a3, a1 -; RV32ZBB-NEXT: sltu a3, a1, a2 -; RV32ZBB-NEXT: sub a6, a1, a5 -; RV32ZBB-NEXT: sltu a5, a1, a4 -; RV32ZBB-NEXT: sub a3, a6, a3 -; RV32ZBB-NEXT: xor a7, t0, a1 -; RV32ZBB-NEXT: mv a6, a5 -; RV32ZBB-NEXT: beqz t0, .LBB32_4 +; RV32ZBB-NEXT: xor a3, a4, a1 +; RV32ZBB-NEXT: xor a6, a5, a1 +; RV32ZBB-NEXT: xor a5, t1, a1 +; RV32ZBB-NEXT: xor a4, a2, a1 +; RV32ZBB-NEXT: sltu a2, a1, a3 +; RV32ZBB-NEXT: sub a6, a1, a6 +; RV32ZBB-NEXT: sub a2, a6, a2 +; RV32ZBB-NEXT: sltu a6, a1, a4 +; RV32ZBB-NEXT: mv a7, a6 +; RV32ZBB-NEXT: beqz t1, .LBB32_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu a6, a1, a7 +; RV32ZBB-NEXT: sltu a7, a1, a5 ; RV32ZBB-NEXT: .LBB32_4: -; RV32ZBB-NEXT: sub a2, a1, a2 -; RV32ZBB-NEXT: sub a7, a1, a7 +; RV32ZBB-NEXT: sub a3, a1, a3 +; RV32ZBB-NEXT: sub a5, a1, a5 ; RV32ZBB-NEXT: sub a1, a1, a4 -; RV32ZBB-NEXT: sltu a4, a2, a6 -; RV32ZBB-NEXT: sub a2, a2, a6 -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sltu a4, a3, a7 +; RV32ZBB-NEXT: sub a3, a3, a7 +; RV32ZBB-NEXT: sub a5, a5, a6 +; RV32ZBB-NEXT: sub a2, a2, a4 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 8(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_subnsw_i128_undef: diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll index 56e6dacff9748..9e866220af666 100644 --- a/llvm/test/CodeGen/RISCV/abds.ll +++ b/llvm/test/CodeGen/RISCV/abds.ll @@ -538,18 +538,18 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw t1, 12(a2) ; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t1, 12(a2) ; RV32I-NEXT: sltu a2, a7, a6 ; RV32I-NEXT: mv t4, a2 ; RV32I-NEXT: beq t0, t1, .LBB11_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sltu t2, a5, a3 ; RV32I-NEXT: sltu t5, a1, a4 +; RV32I-NEXT: sltu t2, a5, a3 ; RV32I-NEXT: mv t3, t2 ; RV32I-NEXT: beq a4, a1, .LBB11_4 ; RV32I-NEXT: # %bb.3: @@ -634,18 +634,18 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw t1, 12(a2) ; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) ; RV32ZBB-NEXT: sltu a2, a7, a6 ; RV32ZBB-NEXT: mv t4, a2 ; RV32ZBB-NEXT: beq t0, t1, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sltu t2, a5, a3 ; RV32ZBB-NEXT: sltu t5, a1, a4 +; RV32ZBB-NEXT: sltu t2, a5, a3 ; RV32ZBB-NEXT: mv t3, t2 ; RV32ZBB-NEXT: beq a4, a1, .LBB11_4 ; RV32ZBB-NEXT: # %bb.3: @@ -738,18 +738,18 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw t1, 12(a2) ; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t1, 12(a2) ; RV32I-NEXT: sltu a2, a7, a6 ; RV32I-NEXT: mv t4, a2 ; RV32I-NEXT: beq t0, t1, .LBB12_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: sltu t2, a5, a3 ; RV32I-NEXT: sltu t5, a1, a4 +; RV32I-NEXT: sltu t2, a5, a3 ; RV32I-NEXT: mv t3, t2 ; RV32I-NEXT: beq a4, a1, .LBB12_4 ; RV32I-NEXT: # %bb.3: @@ -834,18 +834,18 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw t1, 12(a2) ; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) ; RV32ZBB-NEXT: sltu a2, a7, a6 ; RV32ZBB-NEXT: mv t4, a2 ; RV32ZBB-NEXT: beq t0, t1, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB12_2: -; RV32ZBB-NEXT: sltu t2, a5, a3 ; RV32ZBB-NEXT: sltu t5, a1, a4 +; RV32ZBB-NEXT: sltu t2, a5, a3 ; RV32ZBB-NEXT: mv t3, t2 ; RV32ZBB-NEXT: beq a4, a1, .LBB12_4 ; RV32ZBB-NEXT: # %bb.3: @@ -1127,18 +1127,18 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw t1, 12(a2) ; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t1, 12(a2) ; RV32I-NEXT: sltu a2, a7, a6 ; RV32I-NEXT: mv t4, a2 ; RV32I-NEXT: beq t0, t1, .LBB17_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB17_2: -; RV32I-NEXT: sltu t2, a5, a3 ; RV32I-NEXT: sltu t5, a1, a4 +; RV32I-NEXT: sltu t2, a5, a3 ; RV32I-NEXT: mv t3, t2 ; RV32I-NEXT: beq a4, a1, .LBB17_4 ; RV32I-NEXT: # %bb.3: @@ -1223,18 +1223,18 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw t1, 12(a2) ; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) ; RV32ZBB-NEXT: sltu a2, a7, a6 ; RV32ZBB-NEXT: mv t4, a2 ; RV32ZBB-NEXT: beq t0, t1, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB17_2: -; RV32ZBB-NEXT: sltu t2, a5, a3 ; RV32ZBB-NEXT: sltu t5, a1, a4 +; RV32ZBB-NEXT: sltu t2, a5, a3 ; RV32ZBB-NEXT: mv t3, t2 ; RV32ZBB-NEXT: beq a4, a1, .LBB17_4 ; RV32ZBB-NEXT: # %bb.3: @@ -1518,18 +1518,18 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw t1, 12(a2) ; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t1, 12(a2) ; RV32I-NEXT: sltu a2, a7, a6 ; RV32I-NEXT: mv t4, a2 ; RV32I-NEXT: beq t0, t1, .LBB22_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sltu t2, a5, a3 ; RV32I-NEXT: sltu t5, a1, a4 +; RV32I-NEXT: sltu t2, a5, a3 ; RV32I-NEXT: mv t3, t2 ; RV32I-NEXT: beq a4, a1, .LBB22_4 ; RV32I-NEXT: # %bb.3: @@ -1614,18 +1614,18 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw t1, 12(a2) ; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) ; RV32ZBB-NEXT: sltu a2, a7, a6 ; RV32ZBB-NEXT: mv t4, a2 ; RV32ZBB-NEXT: beq t0, t1, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sltu t2, a5, a3 ; RV32ZBB-NEXT: sltu t5, a1, a4 +; RV32ZBB-NEXT: sltu t2, a5, a3 ; RV32ZBB-NEXT: mv t3, t2 ; RV32ZBB-NEXT: beq a4, a1, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: @@ -2045,27 +2045,27 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lw a4, 0(a2) ; RV32I-NEXT: lw a3, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a6, 12(a2) -; RV32I-NEXT: lw a7, 8(a1) -; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a7, a5 -; RV32I-NEXT: sub t0, t0, a6 -; RV32I-NEXT: sltu a6, a2, a4 -; RV32I-NEXT: sub t0, t0, t1 -; RV32I-NEXT: mv t1, a6 -; RV32I-NEXT: beq a1, a3, .LBB31_2 +; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw t0, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sltu t1, t0, a6 +; RV32I-NEXT: sub a1, a1, a7 +; RV32I-NEXT: sub a1, a1, t1 +; RV32I-NEXT: sltu a7, a2, a4 +; RV32I-NEXT: mv t1, a7 +; RV32I-NEXT: beq a5, a3, .LBB31_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a3 +; RV32I-NEXT: sltu t1, a5, a3 ; RV32I-NEXT: .LBB31_2: -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: sub a3, a1, a3 -; RV32I-NEXT: sltu a1, a5, t1 -; RV32I-NEXT: sub a5, a5, t1 -; RV32I-NEXT: sub a1, t0, a1 -; RV32I-NEXT: sub a3, a3, a6 +; RV32I-NEXT: sub a6, t0, a6 +; RV32I-NEXT: sub a3, a5, a3 +; RV32I-NEXT: sltu t0, a6, t1 +; RV32I-NEXT: sub a5, a6, t1 +; RV32I-NEXT: sub a1, a1, t0 +; RV32I-NEXT: sub a3, a3, a7 ; RV32I-NEXT: sub a2, a2, a4 ; RV32I-NEXT: bgez a1, .LBB31_4 ; RV32I-NEXT: # %bb.3: @@ -2108,27 +2108,27 @@ define i128 @abd_subnsw_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a4, 0(a2) ; RV32ZBB-NEXT: lw a3, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a6, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a1) -; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a7, a5 -; RV32ZBB-NEXT: sub t0, t0, a6 -; RV32ZBB-NEXT: sltu a6, a2, a4 -; RV32ZBB-NEXT: sub t0, t0, t1 -; RV32ZBB-NEXT: mv t1, a6 -; RV32ZBB-NEXT: beq a1, a3, .LBB31_2 +; RV32ZBB-NEXT: lw a5, 4(a1) +; RV32ZBB-NEXT: lw t0, 8(a1) +; RV32ZBB-NEXT: lw a1, 12(a1) +; RV32ZBB-NEXT: sltu t1, t0, a6 +; RV32ZBB-NEXT: sub a1, a1, a7 +; RV32ZBB-NEXT: sub a1, a1, t1 +; RV32ZBB-NEXT: sltu a7, a2, a4 +; RV32ZBB-NEXT: mv t1, a7 +; RV32ZBB-NEXT: beq a5, a3, .LBB31_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a3 +; RV32ZBB-NEXT: sltu t1, a5, a3 ; RV32ZBB-NEXT: .LBB31_2: -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: sub a3, a1, a3 -; RV32ZBB-NEXT: sltu a1, a5, t1 -; RV32ZBB-NEXT: sub a5, a5, t1 -; RV32ZBB-NEXT: sub a1, t0, a1 -; RV32ZBB-NEXT: sub a3, a3, a6 +; RV32ZBB-NEXT: sub a6, t0, a6 +; RV32ZBB-NEXT: sub a3, a5, a3 +; RV32ZBB-NEXT: sltu t0, a6, t1 +; RV32ZBB-NEXT: sub a5, a6, t1 +; RV32ZBB-NEXT: sub a1, a1, t0 +; RV32ZBB-NEXT: sub a3, a3, a7 ; RV32ZBB-NEXT: sub a2, a2, a4 ; RV32ZBB-NEXT: bgez a1, .LBB31_4 ; RV32ZBB-NEXT: # %bb.3: @@ -2176,27 +2176,27 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lw a4, 0(a2) ; RV32I-NEXT: lw a3, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a6, 12(a2) -; RV32I-NEXT: lw a7, 8(a1) -; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a7, 12(a2) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a7, a5 -; RV32I-NEXT: sub t0, t0, a6 -; RV32I-NEXT: sltu a6, a2, a4 -; RV32I-NEXT: sub t0, t0, t1 -; RV32I-NEXT: mv t1, a6 -; RV32I-NEXT: beq a1, a3, .LBB32_2 +; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw t0, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sltu t1, t0, a6 +; RV32I-NEXT: sub a1, a1, a7 +; RV32I-NEXT: sub a1, a1, t1 +; RV32I-NEXT: sltu a7, a2, a4 +; RV32I-NEXT: mv t1, a7 +; RV32I-NEXT: beq a5, a3, .LBB32_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a3 +; RV32I-NEXT: sltu t1, a5, a3 ; RV32I-NEXT: .LBB32_2: -; RV32I-NEXT: sub a5, a7, a5 -; RV32I-NEXT: sub a3, a1, a3 -; RV32I-NEXT: sltu a1, a5, t1 -; RV32I-NEXT: sub a5, a5, t1 -; RV32I-NEXT: sub a1, t0, a1 -; RV32I-NEXT: sub a3, a3, a6 +; RV32I-NEXT: sub a6, t0, a6 +; RV32I-NEXT: sub a3, a5, a3 +; RV32I-NEXT: sltu t0, a6, t1 +; RV32I-NEXT: sub a5, a6, t1 +; RV32I-NEXT: sub a1, a1, t0 +; RV32I-NEXT: sub a3, a3, a7 ; RV32I-NEXT: sub a2, a2, a4 ; RV32I-NEXT: bgez a1, .LBB32_4 ; RV32I-NEXT: # %bb.3: @@ -2239,27 +2239,27 @@ define i128 @abd_subnsw_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a4, 0(a2) ; RV32ZBB-NEXT: lw a3, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a6, 12(a2) -; RV32ZBB-NEXT: lw a7, 8(a1) -; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a7, 12(a2) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a7, a5 -; RV32ZBB-NEXT: sub t0, t0, a6 -; RV32ZBB-NEXT: sltu a6, a2, a4 -; RV32ZBB-NEXT: sub t0, t0, t1 -; RV32ZBB-NEXT: mv t1, a6 -; RV32ZBB-NEXT: beq a1, a3, .LBB32_2 +; RV32ZBB-NEXT: lw a5, 4(a1) +; RV32ZBB-NEXT: lw t0, 8(a1) +; RV32ZBB-NEXT: lw a1, 12(a1) +; RV32ZBB-NEXT: sltu t1, t0, a6 +; RV32ZBB-NEXT: sub a1, a1, a7 +; RV32ZBB-NEXT: sub a1, a1, t1 +; RV32ZBB-NEXT: sltu a7, a2, a4 +; RV32ZBB-NEXT: mv t1, a7 +; RV32ZBB-NEXT: beq a5, a3, .LBB32_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a3 +; RV32ZBB-NEXT: sltu t1, a5, a3 ; RV32ZBB-NEXT: .LBB32_2: -; RV32ZBB-NEXT: sub a5, a7, a5 -; RV32ZBB-NEXT: sub a3, a1, a3 -; RV32ZBB-NEXT: sltu a1, a5, t1 -; RV32ZBB-NEXT: sub a5, a5, t1 -; RV32ZBB-NEXT: sub a1, t0, a1 -; RV32ZBB-NEXT: sub a3, a3, a6 +; RV32ZBB-NEXT: sub a6, t0, a6 +; RV32ZBB-NEXT: sub a3, a5, a3 +; RV32ZBB-NEXT: sltu t0, a6, t1 +; RV32ZBB-NEXT: sub a5, a6, t1 +; RV32ZBB-NEXT: sub a1, a1, t0 +; RV32ZBB-NEXT: sub a3, a3, a7 ; RV32ZBB-NEXT: sub a2, a2, a4 ; RV32ZBB-NEXT: bgez a1, .LBB32_4 ; RV32ZBB-NEXT: # %bb.3: @@ -2541,18 +2541,18 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: lw a4, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw t0, 12(a1) -; RV32I-NEXT: lw a7, 8(a2) -; RV32I-NEXT: lw t1, 12(a2) ; RV32I-NEXT: lw a5, 0(a2) ; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t1, 12(a2) ; RV32I-NEXT: sltu a2, a7, a6 ; RV32I-NEXT: mv t4, a2 ; RV32I-NEXT: beq t0, t1, .LBB38_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB38_2: -; RV32I-NEXT: sltu t2, a5, a3 ; RV32I-NEXT: sltu t5, a1, a4 +; RV32I-NEXT: sltu t2, a5, a3 ; RV32I-NEXT: mv t3, t2 ; RV32I-NEXT: beq a4, a1, .LBB38_4 ; RV32I-NEXT: # %bb.3: @@ -2637,18 +2637,18 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: lw a4, 4(a1) ; RV32ZBB-NEXT: lw a6, 8(a1) ; RV32ZBB-NEXT: lw t0, 12(a1) -; RV32ZBB-NEXT: lw a7, 8(a2) -; RV32ZBB-NEXT: lw t1, 12(a2) ; RV32ZBB-NEXT: lw a5, 0(a2) ; RV32ZBB-NEXT: lw a1, 4(a2) +; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw t1, 12(a2) ; RV32ZBB-NEXT: sltu a2, a7, a6 ; RV32ZBB-NEXT: mv t4, a2 ; RV32ZBB-NEXT: beq t0, t1, .LBB38_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: slt t4, t1, t0 ; RV32ZBB-NEXT: .LBB38_2: -; RV32ZBB-NEXT: sltu t2, a5, a3 ; RV32ZBB-NEXT: sltu t5, a1, a4 +; RV32ZBB-NEXT: sltu t2, a5, a3 ; RV32ZBB-NEXT: mv t3, t2 ; RV32ZBB-NEXT: beq a4, a1, .LBB38_4 ; RV32ZBB-NEXT: # %bb.3: diff --git a/llvm/test/CodeGen/RISCV/abdu-neg.ll b/llvm/test/CodeGen/RISCV/abdu-neg.ll index 9e41cde7ae181..a904def2753db 100644 --- a/llvm/test/CodeGen/RISCV/abdu-neg.ll +++ b/llvm/test/CodeGen/RISCV/abdu-neg.ll @@ -624,24 +624,24 @@ define i64 @abd_ext_i64_undef(i64 %a, i64 %b) nounwind { define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a4, 0(a2) ; RV32I-NEXT: lw a7, 4(a2) ; RV32I-NEXT: lw a3, 8(a2) ; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) ; RV32I-NEXT: lw a2, 0(a1) ; RV32I-NEXT: lw t0, 4(a1) -; RV32I-NEXT: sltu a1, a4, a3 +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: sltu a1, a5, a3 ; RV32I-NEXT: sub t1, a6, t1 -; RV32I-NEXT: sltu t2, a2, a5 ; RV32I-NEXT: sub a1, t1, a1 +; RV32I-NEXT: sltu t2, a2, a4 ; RV32I-NEXT: mv t1, t2 ; RV32I-NEXT: beq t0, a7, .LBB11_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu t1, t0, a7 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: sub a3, a5, a3 ; RV32I-NEXT: sltu t3, a3, t1 ; RV32I-NEXT: sub a1, a1, t3 ; RV32I-NEXT: sub a3, a3, t1 @@ -650,27 +650,27 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sltu t1, a6, a1 ; RV32I-NEXT: j .LBB11_5 ; RV32I-NEXT: .LBB11_4: -; RV32I-NEXT: sltu t1, a4, a3 +; RV32I-NEXT: sltu t1, a5, a3 ; RV32I-NEXT: .LBB11_5: ; RV32I-NEXT: sub a7, t0, a7 ; RV32I-NEXT: sub a7, a7, t2 -; RV32I-NEXT: sub a5, a2, a5 +; RV32I-NEXT: sub t2, a2, a4 ; RV32I-NEXT: beq a7, t0, .LBB11_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: sltu a2, t0, a7 ; RV32I-NEXT: j .LBB11_8 ; RV32I-NEXT: .LBB11_7: -; RV32I-NEXT: sltu a2, a2, a5 +; RV32I-NEXT: sltu a2, a2, t2 ; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: xor a6, a1, a6 -; RV32I-NEXT: xor a4, a3, a4 -; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: xor a4, a1, a6 +; RV32I-NEXT: xor a5, a3, a5 +; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: beqz a4, .LBB11_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a2, t1 ; RV32I-NEXT: .LBB11_10: ; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: xor t0, a5, a4 +; RV32I-NEXT: xor t0, t2, a4 ; RV32I-NEXT: xor t3, a7, a4 ; RV32I-NEXT: sltu a5, t0, a4 ; RV32I-NEXT: add a6, t3, a2 @@ -736,24 +736,24 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw a4, 0(a2) ; RV32ZBB-NEXT: lw a7, 4(a2) ; RV32ZBB-NEXT: lw a3, 8(a2) ; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) ; RV32ZBB-NEXT: lw a2, 0(a1) ; RV32ZBB-NEXT: lw t0, 4(a1) -; RV32ZBB-NEXT: sltu a1, a4, a3 +; RV32ZBB-NEXT: lw a5, 8(a1) +; RV32ZBB-NEXT: lw a6, 12(a1) +; RV32ZBB-NEXT: sltu a1, a5, a3 ; RV32ZBB-NEXT: sub t1, a6, t1 -; RV32ZBB-NEXT: sltu t2, a2, a5 ; RV32ZBB-NEXT: sub a1, t1, a1 +; RV32ZBB-NEXT: sltu t2, a2, a4 ; RV32ZBB-NEXT: mv t1, t2 ; RV32ZBB-NEXT: beq t0, a7, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: sltu t1, t0, a7 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: sub a3, a5, a3 ; RV32ZBB-NEXT: sltu t3, a3, t1 ; RV32ZBB-NEXT: sub a1, a1, t3 ; RV32ZBB-NEXT: sub a3, a3, t1 @@ -762,27 +762,27 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sltu t1, a6, a1 ; RV32ZBB-NEXT: j .LBB11_5 ; RV32ZBB-NEXT: .LBB11_4: -; RV32ZBB-NEXT: sltu t1, a4, a3 +; RV32ZBB-NEXT: sltu t1, a5, a3 ; RV32ZBB-NEXT: .LBB11_5: ; RV32ZBB-NEXT: sub a7, t0, a7 ; RV32ZBB-NEXT: sub a7, a7, t2 -; RV32ZBB-NEXT: sub a5, a2, a5 +; RV32ZBB-NEXT: sub t2, a2, a4 ; RV32ZBB-NEXT: beq a7, t0, .LBB11_7 ; RV32ZBB-NEXT: # %bb.6: ; RV32ZBB-NEXT: sltu a2, t0, a7 ; RV32ZBB-NEXT: j .LBB11_8 ; RV32ZBB-NEXT: .LBB11_7: -; RV32ZBB-NEXT: sltu a2, a2, a5 +; RV32ZBB-NEXT: sltu a2, a2, t2 ; RV32ZBB-NEXT: .LBB11_8: -; RV32ZBB-NEXT: xor a6, a1, a6 -; RV32ZBB-NEXT: xor a4, a3, a4 -; RV32ZBB-NEXT: or a4, a4, a6 +; RV32ZBB-NEXT: xor a4, a1, a6 +; RV32ZBB-NEXT: xor a5, a3, a5 +; RV32ZBB-NEXT: or a4, a5, a4 ; RV32ZBB-NEXT: beqz a4, .LBB11_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a2, t1 ; RV32ZBB-NEXT: .LBB11_10: ; RV32ZBB-NEXT: neg a4, a2 -; RV32ZBB-NEXT: xor t0, a5, a4 +; RV32ZBB-NEXT: xor t0, t2, a4 ; RV32ZBB-NEXT: xor t3, a7, a4 ; RV32ZBB-NEXT: sltu a5, t0, a4 ; RV32ZBB-NEXT: add a6, t3, a2 @@ -857,24 +857,24 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128_undef: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a4, 0(a2) ; RV32I-NEXT: lw a7, 4(a2) ; RV32I-NEXT: lw a3, 8(a2) ; RV32I-NEXT: lw t1, 12(a2) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) ; RV32I-NEXT: lw a2, 0(a1) ; RV32I-NEXT: lw t0, 4(a1) -; RV32I-NEXT: sltu a1, a4, a3 +; RV32I-NEXT: lw a5, 8(a1) +; RV32I-NEXT: lw a6, 12(a1) +; RV32I-NEXT: sltu a1, a5, a3 ; RV32I-NEXT: sub t1, a6, t1 -; RV32I-NEXT: sltu t2, a2, a5 ; RV32I-NEXT: sub a1, t1, a1 +; RV32I-NEXT: sltu t2, a2, a4 ; RV32I-NEXT: mv t1, t2 ; RV32I-NEXT: beq t0, a7, .LBB12_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sltu t1, t0, a7 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: sub a3, a4, a3 +; RV32I-NEXT: sub a3, a5, a3 ; RV32I-NEXT: sltu t3, a3, t1 ; RV32I-NEXT: sub a1, a1, t3 ; RV32I-NEXT: sub a3, a3, t1 @@ -883,27 +883,27 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: sltu t1, a6, a1 ; RV32I-NEXT: j .LBB12_5 ; RV32I-NEXT: .LBB12_4: -; RV32I-NEXT: sltu t1, a4, a3 +; RV32I-NEXT: sltu t1, a5, a3 ; RV32I-NEXT: .LBB12_5: ; RV32I-NEXT: sub a7, t0, a7 ; RV32I-NEXT: sub a7, a7, t2 -; RV32I-NEXT: sub a5, a2, a5 +; RV32I-NEXT: sub t2, a2, a4 ; RV32I-NEXT: beq a7, t0, .LBB12_7 ; RV32I-NEXT: # %bb.6: ; RV32I-NEXT: sltu a2, t0, a7 ; RV32I-NEXT: j .LBB12_8 ; RV32I-NEXT: .LBB12_7: -; RV32I-NEXT: sltu a2, a2, a5 +; RV32I-NEXT: sltu a2, a2, t2 ; RV32I-NEXT: .LBB12_8: -; RV32I-NEXT: xor a6, a1, a6 -; RV32I-NEXT: xor a4, a3, a4 -; RV32I-NEXT: or a4, a4, a6 +; RV32I-NEXT: xor a4, a1, a6 +; RV32I-NEXT: xor a5, a3, a5 +; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: beqz a4, .LBB12_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a2, t1 ; RV32I-NEXT: .LBB12_10: ; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: xor t0, a5, a4 +; RV32I-NEXT: xor t0, t2, a4 ; RV32I-NEXT: xor t3, a7, a4 ; RV32I-NEXT: sltu a5, t0, a4 ; RV32I-NEXT: add a6, t3, a2 @@ -969,24 +969,24 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_ext_i128_undef: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a5, 0(a2) +; RV32ZBB-NEXT: lw a4, 0(a2) ; RV32ZBB-NEXT: lw a7, 4(a2) ; RV32ZBB-NEXT: lw a3, 8(a2) ; RV32ZBB-NEXT: lw t1, 12(a2) -; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: lw a6, 12(a1) ; RV32ZBB-NEXT: lw a2, 0(a1) ; RV32ZBB-NEXT: lw t0, 4(a1) -; RV32ZBB-NEXT: sltu a1, a4, a3 +; RV32ZBB-NEXT: lw a5, 8(a1) +; RV32ZBB-NEXT: lw a6, 12(a1) +; RV32ZBB-NEXT: sltu a1, a5, a3 ; RV32ZBB-NEXT: sub t1, a6, t1 -; RV32ZBB-NEXT: sltu t2, a2, a5 ; RV32ZBB-NEXT: sub a1, t1, a1 +; RV32ZBB-NEXT: sltu t2, a2, a4 ; RV32ZBB-NEXT: mv t1, t2 ; RV32ZBB-NEXT: beq t0, a7, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: sltu t1, t0, a7 ; RV32ZBB-NEXT: .LBB12_2: -; RV32ZBB-NEXT: sub a3, a4, a3 +; RV32ZBB-NEXT: sub a3, a5, a3 ; RV32ZBB-NEXT: sltu t3, a3, t1 ; RV32ZBB-NEXT: sub a1, a1, t3 ; RV32ZBB-NEXT: sub a3, a3, t1 @@ -995,27 +995,27 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: sltu t1, a6, a1 ; RV32ZBB-NEXT: j .LBB12_5 ; RV32ZBB-NEXT: .LBB12_4: -; RV32ZBB-NEXT: sltu t1, a4, a3 +; RV32ZBB-NEXT: sltu t1, a5, a3 ; RV32ZBB-NEXT: .LBB12_5: ; RV32ZBB-NEXT: sub a7, t0, a7 ; RV32ZBB-NEXT: sub a7, a7, t2 -; RV32ZBB-NEXT: sub a5, a2, a5 +; RV32ZBB-NEXT: sub t2, a2, a4 ; RV32ZBB-NEXT: beq a7, t0, .LBB12_7 ; RV32ZBB-NEXT: # %bb.6: ; RV32ZBB-NEXT: sltu a2, t0, a7 ; RV32ZBB-NEXT: j .LBB12_8 ; RV32ZBB-NEXT: .LBB12_7: -; RV32ZBB-NEXT: sltu a2, a2, a5 +; RV32ZBB-NEXT: sltu a2, a2, t2 ; RV32ZBB-NEXT: .LBB12_8: -; RV32ZBB-NEXT: xor a6, a1, a6 -; RV32ZBB-NEXT: xor a4, a3, a4 -; RV32ZBB-NEXT: or a4, a4, a6 +; RV32ZBB-NEXT: xor a4, a1, a6 +; RV32ZBB-NEXT: xor a5, a3, a5 +; RV32ZBB-NEXT: or a4, a5, a4 ; RV32ZBB-NEXT: beqz a4, .LBB12_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a2, t1 ; RV32ZBB-NEXT: .LBB12_10: ; RV32ZBB-NEXT: neg a4, a2 -; RV32ZBB-NEXT: xor t0, a5, a4 +; RV32ZBB-NEXT: xor t0, t2, a4 ; RV32ZBB-NEXT: xor t3, a7, a4 ; RV32ZBB-NEXT: sltu a5, t0, a4 ; RV32ZBB-NEXT: add a6, t3, a2 @@ -1335,30 +1335,30 @@ define i64 @abd_minmax_i64(i64 %a, i64 %b) nounwind { define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_minmax_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a6, 4(a2) -; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw a5, 4(a2) +; RV32I-NEXT: lw a6, 8(a2) ; RV32I-NEXT: lw t0, 12(a2) -; RV32I-NEXT: lw a5, 12(a1) ; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: beq a5, t0, .LBB17_2 +; RV32I-NEXT: lw a7, 12(a1) +; RV32I-NEXT: beq a7, t0, .LBB17_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a5, t0 +; RV32I-NEXT: sltu t1, a7, t0 ; RV32I-NEXT: j .LBB17_3 ; RV32I-NEXT: .LBB17_2: -; RV32I-NEXT: sltu t1, a4, a7 +; RV32I-NEXT: sltu t1, a4, a6 ; RV32I-NEXT: .LBB17_3: ; RV32I-NEXT: lw t2, 0(a2) ; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: beq a3, a6, .LBB17_5 +; RV32I-NEXT: beq a3, a5, .LBB17_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sltu t6, a3, a6 +; RV32I-NEXT: sltu t6, a3, a5 ; RV32I-NEXT: j .LBB17_6 ; RV32I-NEXT: .LBB17_5: ; RV32I-NEXT: sltu t6, a1, t2 ; RV32I-NEXT: .LBB17_6: -; RV32I-NEXT: xor a2, a5, t0 -; RV32I-NEXT: xor t3, a4, a7 +; RV32I-NEXT: xor a2, a7, t0 +; RV32I-NEXT: xor t3, a4, a6 ; RV32I-NEXT: or t5, t3, a2 ; RV32I-NEXT: beqz t5, .LBB17_8 ; RV32I-NEXT: # %bb.7: @@ -1366,27 +1366,27 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: .LBB17_8: ; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: mv t1, a3 -; RV32I-NEXT: mv t4, a5 +; RV32I-NEXT: mv t4, a7 ; RV32I-NEXT: mv t3, a4 ; RV32I-NEXT: bnez t6, .LBB17_10 ; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: mv a2, t2 -; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: mv t1, a5 ; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: mv t3, a7 +; RV32I-NEXT: mv t3, a6 ; RV32I-NEXT: .LBB17_10: -; RV32I-NEXT: beq a5, t0, .LBB17_12 +; RV32I-NEXT: beq a7, t0, .LBB17_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu t6, t0, a5 +; RV32I-NEXT: sltu t6, t0, a7 ; RV32I-NEXT: j .LBB17_13 ; RV32I-NEXT: .LBB17_12: -; RV32I-NEXT: sltu t6, a7, a4 +; RV32I-NEXT: sltu t6, a6, a4 ; RV32I-NEXT: .LBB17_13: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: beq a3, a6, .LBB17_15 +; RV32I-NEXT: beq a3, a5, .LBB17_15 ; RV32I-NEXT: # %bb.14: -; RV32I-NEXT: sltu s0, a6, a3 +; RV32I-NEXT: sltu s0, a5, a3 ; RV32I-NEXT: bnez t5, .LBB17_16 ; RV32I-NEXT: j .LBB17_17 ; RV32I-NEXT: .LBB17_15: @@ -1398,14 +1398,14 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: bnez s0, .LBB17_19 ; RV32I-NEXT: # %bb.18: ; RV32I-NEXT: mv a1, t2 -; RV32I-NEXT: mv a3, a6 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a3, a5 +; RV32I-NEXT: mv a7, t0 +; RV32I-NEXT: mv a4, a6 ; RV32I-NEXT: .LBB17_19: -; RV32I-NEXT: sltu a7, t3, a4 -; RV32I-NEXT: sub a5, t4, a5 +; RV32I-NEXT: sltu a5, t3, a4 +; RV32I-NEXT: sub a6, t4, a7 +; RV32I-NEXT: sub a5, a6, a5 ; RV32I-NEXT: sltu a6, a2, a1 -; RV32I-NEXT: sub a5, a5, a7 ; RV32I-NEXT: mv a7, a6 ; RV32I-NEXT: beq t1, a3, .LBB17_21 ; RV32I-NEXT: # %bb.20: @@ -1462,30 +1462,30 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; ; RV32ZBB-LABEL: abd_minmax_i128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a6, 4(a2) -; RV32ZBB-NEXT: lw a7, 8(a2) +; RV32ZBB-NEXT: lw a5, 4(a2) +; RV32ZBB-NEXT: lw a6, 8(a2) ; RV32ZBB-NEXT: lw t0, 12(a2) -; RV32ZBB-NEXT: lw a5, 12(a1) ; RV32ZBB-NEXT: lw a3, 4(a1) ; RV32ZBB-NEXT: lw a4, 8(a1) -; RV32ZBB-NEXT: beq a5, t0, .LBB17_2 +; RV32ZBB-NEXT: lw a7, 12(a1) +; RV32ZBB-NEXT: beq a7, t0, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a5, t0 +; RV32ZBB-NEXT: sltu t1, a7, t0 ; RV32ZBB-NEXT: j .LBB17_3 ; RV32ZBB-NEXT: .LBB17_2: -; RV32ZBB-NEXT: sltu t1, a4, a7 +; RV32ZBB-NEXT: sltu t1, a4, a6 ; RV32ZBB-NEXT: .LBB17_3: ; RV32ZBB-NEXT: lw t2, 0(a2) ; RV32ZBB-NEXT: lw a1, 0(a1) -; RV32ZBB-NEXT: beq a3, a6, .LBB17_5 +; RV32ZBB-NEXT: beq a3, a5, .LBB17_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sltu t6, a3, a6 +; RV32ZBB-NEXT: sltu t6, a3, a5 ; RV32ZBB-NEXT: j .LBB17_6 ; RV32ZBB-NEXT: .LBB17_5: ; RV32ZBB-NEXT: sltu t6, a1, t2 ; RV32ZBB-NEXT: .LBB17_6: -; RV32ZBB-NEXT: xor a2, a5, t0 -; RV32ZBB-NEXT: xor t3, a4, a7 +; RV32ZBB-NEXT: xor a2, a7, t0 +; RV32ZBB-NEXT: xor t3, a4, a6 ; RV32ZBB-NEXT: or t5, t3, a2 ; RV32ZBB-NEXT: beqz t5, .LBB17_8 ; RV32ZBB-NEXT: # %bb.7: @@ -1493,27 +1493,27 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: .LBB17_8: ; RV32ZBB-NEXT: mv a2, a1 ; RV32ZBB-NEXT: mv t1, a3 -; RV32ZBB-NEXT: mv t4, a5 +; RV32ZBB-NEXT: mv t4, a7 ; RV32ZBB-NEXT: mv t3, a4 ; RV32ZBB-NEXT: bnez t6, .LBB17_10 ; RV32ZBB-NEXT: # %bb.9: ; RV32ZBB-NEXT: mv a2, t2 -; RV32ZBB-NEXT: mv t1, a6 +; RV32ZBB-NEXT: mv t1, a5 ; RV32ZBB-NEXT: mv t4, t0 -; RV32ZBB-NEXT: mv t3, a7 +; RV32ZBB-NEXT: mv t3, a6 ; RV32ZBB-NEXT: .LBB17_10: -; RV32ZBB-NEXT: beq a5, t0, .LBB17_12 +; RV32ZBB-NEXT: beq a7, t0, .LBB17_12 ; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu t6, t0, a5 +; RV32ZBB-NEXT: sltu t6, t0, a7 ; RV32ZBB-NEXT: j .LBB17_13 ; RV32ZBB-NEXT: .LBB17_12: -; RV32ZBB-NEXT: sltu t6, a7, a4 +; RV32ZBB-NEXT: sltu t6, a6, a4 ; RV32ZBB-NEXT: .LBB17_13: ; RV32ZBB-NEXT: addi sp, sp, -16 ; RV32ZBB-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32ZBB-NEXT: beq a3, a6, .LBB17_15 +; RV32ZBB-NEXT: beq a3, a5, .LBB17_15 ; RV32ZBB-NEXT: # %bb.14: -; RV32ZBB-NEXT: sltu s0, a6, a3 +; RV32ZBB-NEXT: sltu s0, a5, a3 ; RV32ZBB-NEXT: bnez t5, .LBB17_16 ; RV32ZBB-NEXT: j .LBB17_17 ; RV32ZBB-NEXT: .LBB17_15: @@ -1525,14 +1525,14 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: bnez s0, .LBB17_19 ; RV32ZBB-NEXT: # %bb.18: ; RV32ZBB-NEXT: mv a1, t2 -; RV32ZBB-NEXT: mv a3, a6 -; RV32ZBB-NEXT: mv a5, t0 -; RV32ZBB-NEXT: mv a4, a7 +; RV32ZBB-NEXT: mv a3, a5 +; RV32ZBB-NEXT: mv a7, t0 +; RV32ZBB-NEXT: mv a4, a6 ; RV32ZBB-NEXT: .LBB17_19: -; RV32ZBB-NEXT: sltu a7, t3, a4 -; RV32ZBB-NEXT: sub a5, t4, a5 +; RV32ZBB-NEXT: sltu a5, t3, a4 +; RV32ZBB-NEXT: sub a6, t4, a7 +; RV32ZBB-NEXT: sub a5, a6, a5 ; RV32ZBB-NEXT: sltu a6, a2, a1 -; RV32ZBB-NEXT: sub a5, a5, a7 ; RV32ZBB-NEXT: mv a7, a6 ; RV32ZBB-NEXT: beq t1, a3, .LBB17_21 ; RV32ZBB-NEXT: # %bb.20: @@ -1799,26 +1799,26 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) ; RV32I-NEXT: lw a4, 4(a2) -; RV32I-NEXT: lw a5, 8(a2) -; RV32I-NEXT: lw a7, 12(a2) -; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw t0, 12(a2) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a6, a5 -; RV32I-NEXT: mv t4, t1 -; RV32I-NEXT: beq t0, a7, .LBB22_2 +; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw a7, 8(a1) +; RV32I-NEXT: lw t1, 12(a1) +; RV32I-NEXT: sltu a1, a7, a6 +; RV32I-NEXT: mv t4, a1 +; RV32I-NEXT: beq t1, t0, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t4, t0, a7 +; RV32I-NEXT: sltu t4, t1, t0 ; RV32I-NEXT: .LBB22_2: ; RV32I-NEXT: sltu t2, a2, a3 ; RV32I-NEXT: mv t3, t2 -; RV32I-NEXT: beq a1, a4, .LBB22_4 +; RV32I-NEXT: beq a5, a4, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t3, a1, a4 +; RV32I-NEXT: sltu t3, a5, a4 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: xor t5, t0, a7 -; RV32I-NEXT: xor t6, a6, a5 +; RV32I-NEXT: xor t5, t1, t0 +; RV32I-NEXT: xor t6, a7, a6 ; RV32I-NEXT: or t5, t6, t5 ; RV32I-NEXT: mv t6, t3 ; RV32I-NEXT: beqz t5, .LBB22_6 @@ -1827,32 +1827,32 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: .LBB22_6: ; RV32I-NEXT: sltu t4, a3, a2 ; RV32I-NEXT: mv t5, t4 -; RV32I-NEXT: beq a1, a4, .LBB22_8 +; RV32I-NEXT: beq a5, a4, .LBB22_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: sltu t5, a4, a1 +; RV32I-NEXT: sltu t5, a4, a5 ; RV32I-NEXT: .LBB22_8: ; RV32I-NEXT: bnez t6, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu t1, a5, a6 -; RV32I-NEXT: sub a7, a7, t0 -; RV32I-NEXT: sub a5, a5, a6 -; RV32I-NEXT: sub a4, a4, a1 -; RV32I-NEXT: sub a6, a7, t1 -; RV32I-NEXT: sltu a7, a5, t5 -; RV32I-NEXT: sub a1, a5, t5 +; RV32I-NEXT: sltu a1, a6, a7 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a6, a6, a7 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a7, t0, a1 +; RV32I-NEXT: sltu t0, a6, t5 +; RV32I-NEXT: sub a1, a6, t5 ; RV32I-NEXT: sub a5, a4, t4 -; RV32I-NEXT: sub a4, a6, a7 +; RV32I-NEXT: sub a4, a7, t0 ; RV32I-NEXT: sub a2, a3, a2 ; RV32I-NEXT: j .LBB22_11 ; RV32I-NEXT: .LBB22_10: -; RV32I-NEXT: sub a7, t0, a7 -; RV32I-NEXT: sub a5, a6, a5 -; RV32I-NEXT: sub a4, a1, a4 -; RV32I-NEXT: sub a6, a7, t1 -; RV32I-NEXT: sltu a7, a5, t3 -; RV32I-NEXT: sub a1, a5, t3 -; RV32I-NEXT: sub a5, a4, t2 -; RV32I-NEXT: sub a4, a6, a7 +; RV32I-NEXT: sub t0, t1, t0 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a5, a5, a4 +; RV32I-NEXT: sub a4, t0, a1 +; RV32I-NEXT: sltu a7, a6, t3 +; RV32I-NEXT: sub a1, a6, t3 +; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a4, a4, a7 ; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: .LBB22_11: ; RV32I-NEXT: sw a2, 0(a0) @@ -1886,26 +1886,26 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) ; RV32ZBB-NEXT: lw a4, 4(a2) -; RV32ZBB-NEXT: lw a5, 8(a2) -; RV32ZBB-NEXT: lw a7, 12(a2) -; RV32ZBB-NEXT: lw a6, 8(a1) -; RV32ZBB-NEXT: lw t0, 12(a1) +; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw t0, 12(a2) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a6, a5 -; RV32ZBB-NEXT: mv t4, t1 -; RV32ZBB-NEXT: beq t0, a7, .LBB22_2 +; RV32ZBB-NEXT: lw a5, 4(a1) +; RV32ZBB-NEXT: lw a7, 8(a1) +; RV32ZBB-NEXT: lw t1, 12(a1) +; RV32ZBB-NEXT: sltu a1, a7, a6 +; RV32ZBB-NEXT: mv t4, a1 +; RV32ZBB-NEXT: beq t1, t0, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t4, t0, a7 +; RV32ZBB-NEXT: sltu t4, t1, t0 ; RV32ZBB-NEXT: .LBB22_2: ; RV32ZBB-NEXT: sltu t2, a2, a3 ; RV32ZBB-NEXT: mv t3, t2 -; RV32ZBB-NEXT: beq a1, a4, .LBB22_4 +; RV32ZBB-NEXT: beq a5, a4, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t3, a1, a4 +; RV32ZBB-NEXT: sltu t3, a5, a4 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: xor t5, t0, a7 -; RV32ZBB-NEXT: xor t6, a6, a5 +; RV32ZBB-NEXT: xor t5, t1, t0 +; RV32ZBB-NEXT: xor t6, a7, a6 ; RV32ZBB-NEXT: or t5, t6, t5 ; RV32ZBB-NEXT: mv t6, t3 ; RV32ZBB-NEXT: beqz t5, .LBB22_6 @@ -1914,32 +1914,32 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-NEXT: .LBB22_6: ; RV32ZBB-NEXT: sltu t4, a3, a2 ; RV32ZBB-NEXT: mv t5, t4 -; RV32ZBB-NEXT: beq a1, a4, .LBB22_8 +; RV32ZBB-NEXT: beq a5, a4, .LBB22_8 ; RV32ZBB-NEXT: # %bb.7: -; RV32ZBB-NEXT: sltu t5, a4, a1 +; RV32ZBB-NEXT: sltu t5, a4, a5 ; RV32ZBB-NEXT: .LBB22_8: ; RV32ZBB-NEXT: bnez t6, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: sltu t1, a5, a6 -; RV32ZBB-NEXT: sub a7, a7, t0 -; RV32ZBB-NEXT: sub a5, a5, a6 -; RV32ZBB-NEXT: sub a4, a4, a1 -; RV32ZBB-NEXT: sub a6, a7, t1 -; RV32ZBB-NEXT: sltu a7, a5, t5 -; RV32ZBB-NEXT: sub a1, a5, t5 +; RV32ZBB-NEXT: sltu a1, a6, a7 +; RV32ZBB-NEXT: sub t0, t0, t1 +; RV32ZBB-NEXT: sub a6, a6, a7 +; RV32ZBB-NEXT: sub a4, a4, a5 +; RV32ZBB-NEXT: sub a7, t0, a1 +; RV32ZBB-NEXT: sltu t0, a6, t5 +; RV32ZBB-NEXT: sub a1, a6, t5 ; RV32ZBB-NEXT: sub a5, a4, t4 -; RV32ZBB-NEXT: sub a4, a6, a7 +; RV32ZBB-NEXT: sub a4, a7, t0 ; RV32ZBB-NEXT: sub a2, a3, a2 ; RV32ZBB-NEXT: j .LBB22_11 ; RV32ZBB-NEXT: .LBB22_10: -; RV32ZBB-NEXT: sub a7, t0, a7 -; RV32ZBB-NEXT: sub a5, a6, a5 -; RV32ZBB-NEXT: sub a4, a1, a4 -; RV32ZBB-NEXT: sub a6, a7, t1 -; RV32ZBB-NEXT: sltu a7, a5, t3 -; RV32ZBB-NEXT: sub a1, a5, t3 -; RV32ZBB-NEXT: sub a5, a4, t2 -; RV32ZBB-NEXT: sub a4, a6, a7 +; RV32ZBB-NEXT: sub t0, t1, t0 +; RV32ZBB-NEXT: sub a6, a7, a6 +; RV32ZBB-NEXT: sub a5, a5, a4 +; RV32ZBB-NEXT: sub a4, t0, a1 +; RV32ZBB-NEXT: sltu a7, a6, t3 +; RV32ZBB-NEXT: sub a1, a6, t3 +; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a4, a4, a7 ; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: .LBB22_11: ; RV32ZBB-NEXT: sw a2, 0(a0) diff --git a/llvm/test/CodeGen/RISCV/abdu.ll b/llvm/test/CodeGen/RISCV/abdu.ll index 7c8638cb461e2..899c12a2e128d 100644 --- a/llvm/test/CodeGen/RISCV/abdu.ll +++ b/llvm/test/CodeGen/RISCV/abdu.ll @@ -541,75 +541,75 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a6, 4(a2) +; RV32I-NEXT: lw a5, 8(a2) ; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw t1, 4(a1) ; RV32I-NEXT: lw a2, 8(a1) ; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw t0, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a2, a6 +; RV32I-NEXT: sltu a1, a2, a5 ; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t2, t0, a3 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, a5, .LBB11_2 +; RV32I-NEXT: sub a7, a7, a1 +; RV32I-NEXT: sltu a1, t0, a3 +; RV32I-NEXT: mv t2, a1 +; RV32I-NEXT: beq t1, a6, .LBB11_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a5 +; RV32I-NEXT: sltu t2, t1, a6 ; RV32I-NEXT: .LBB11_2: -; RV32I-NEXT: sub t3, a2, a6 -; RV32I-NEXT: sltu a6, t3, t1 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a7, t3, t1 -; RV32I-NEXT: beq a6, a4, .LBB11_4 +; RV32I-NEXT: sub t3, a2, a5 +; RV32I-NEXT: sltu a5, t3, t2 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: sub a7, t3, t2 +; RV32I-NEXT: beq a5, a4, .LBB11_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a4, a6 +; RV32I-NEXT: sltu t2, a4, a5 ; RV32I-NEXT: j .LBB11_5 ; RV32I-NEXT: .LBB11_4: -; RV32I-NEXT: sltu t1, a2, a7 +; RV32I-NEXT: sltu t2, a2, a7 ; RV32I-NEXT: .LBB11_5: -; RV32I-NEXT: sub a5, a1, a5 -; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a6, t1, a6 +; RV32I-NEXT: sub a6, a6, a1 ; RV32I-NEXT: sub a3, t0, a3 -; RV32I-NEXT: beq a5, a1, .LBB11_7 +; RV32I-NEXT: beq a6, t1, .LBB11_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: sltu a1, t1, a6 ; RV32I-NEXT: j .LBB11_8 ; RV32I-NEXT: .LBB11_7: ; RV32I-NEXT: sltu a1, t0, a3 ; RV32I-NEXT: .LBB11_8: -; RV32I-NEXT: xor a4, a6, a4 +; RV32I-NEXT: xor a4, a5, a4 ; RV32I-NEXT: xor a2, a7, a2 ; RV32I-NEXT: or a2, a2, a4 ; RV32I-NEXT: beqz a2, .LBB11_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a1, t1 +; RV32I-NEXT: mv a1, t2 ; RV32I-NEXT: .LBB11_10: ; RV32I-NEXT: neg t0, a1 -; RV32I-NEXT: xor a2, a7, t0 -; RV32I-NEXT: xor a6, a6, t0 -; RV32I-NEXT: xor a4, a3, t0 -; RV32I-NEXT: sltu a3, a2, t0 -; RV32I-NEXT: add a7, a6, a1 -; RV32I-NEXT: sltu a6, a4, t0 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: xor t1, a5, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a5, .LBB11_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a7, t1, t0 -; RV32I-NEXT: .LBB11_12: +; RV32I-NEXT: xor a4, a7, t0 +; RV32I-NEXT: xor a2, a5, t0 +; RV32I-NEXT: xor a5, a6, t0 +; RV32I-NEXT: xor a3, a3, t0 +; RV32I-NEXT: sltu a7, a4, t0 ; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: add t1, t1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: sltu a4, a2, a7 ; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a5, t1, a6 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sltu a7, a3, t0 +; RV32I-NEXT: mv t1, a7 +; RV32I-NEXT: beqz a6, .LBB11_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sltu t1, a5, t0 +; RV32I-NEXT: .LBB11_12: +; RV32I-NEXT: add a4, a4, a1 +; RV32I-NEXT: add a5, a5, a1 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: sltu a3, a4, t1 +; RV32I-NEXT: sub a4, a4, t1 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_ext_i128: @@ -637,75 +637,75 @@ define i128 @abd_ext_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_ext_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) +; RV32ZBB-NEXT: lw a5, 8(a2) ; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw t0, 0(a1) +; RV32ZBB-NEXT: lw t1, 4(a1) ; RV32ZBB-NEXT: lw a2, 8(a1) ; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw t0, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a2, a6 +; RV32ZBB-NEXT: sltu a1, a2, a5 ; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t2, t0, a3 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB11_2 +; RV32ZBB-NEXT: sub a7, a7, a1 +; RV32ZBB-NEXT: sltu a1, t0, a3 +; RV32ZBB-NEXT: mv t2, a1 +; RV32ZBB-NEXT: beq t1, a6, .LBB11_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a5 +; RV32ZBB-NEXT: sltu t2, t1, a6 ; RV32ZBB-NEXT: .LBB11_2: -; RV32ZBB-NEXT: sub t3, a2, a6 -; RV32ZBB-NEXT: sltu a6, t3, t1 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a7, t3, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB11_4 +; RV32ZBB-NEXT: sub t3, a2, a5 +; RV32ZBB-NEXT: sltu a5, t3, t2 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: sub a7, t3, t2 +; RV32ZBB-NEXT: beq a5, a4, .LBB11_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a4, a6 +; RV32ZBB-NEXT: sltu t2, a4, a5 ; RV32ZBB-NEXT: j .LBB11_5 ; RV32ZBB-NEXT: .LBB11_4: -; RV32ZBB-NEXT: sltu t1, a2, a7 +; RV32ZBB-NEXT: sltu t2, a2, a7 ; RV32ZBB-NEXT: .LBB11_5: -; RV32ZBB-NEXT: sub a5, a1, a5 -; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a6, t1, a6 +; RV32ZBB-NEXT: sub a6, a6, a1 ; RV32ZBB-NEXT: sub a3, t0, a3 -; RV32ZBB-NEXT: beq a5, a1, .LBB11_7 +; RV32ZBB-NEXT: beq a6, t1, .LBB11_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, a5 +; RV32ZBB-NEXT: sltu a1, t1, a6 ; RV32ZBB-NEXT: j .LBB11_8 ; RV32ZBB-NEXT: .LBB11_7: ; RV32ZBB-NEXT: sltu a1, t0, a3 ; RV32ZBB-NEXT: .LBB11_8: -; RV32ZBB-NEXT: xor a4, a6, a4 +; RV32ZBB-NEXT: xor a4, a5, a4 ; RV32ZBB-NEXT: xor a2, a7, a2 ; RV32ZBB-NEXT: or a2, a2, a4 ; RV32ZBB-NEXT: beqz a2, .LBB11_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: mv a1, t1 +; RV32ZBB-NEXT: mv a1, t2 ; RV32ZBB-NEXT: .LBB11_10: ; RV32ZBB-NEXT: neg t0, a1 -; RV32ZBB-NEXT: xor a2, a7, t0 -; RV32ZBB-NEXT: xor a6, a6, t0 -; RV32ZBB-NEXT: xor a4, a3, t0 -; RV32ZBB-NEXT: sltu a3, a2, t0 -; RV32ZBB-NEXT: add a7, a6, a1 -; RV32ZBB-NEXT: sltu a6, a4, t0 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: xor t1, a5, t0 -; RV32ZBB-NEXT: mv a7, a6 -; RV32ZBB-NEXT: beqz a5, .LBB11_12 -; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a7, t1, t0 -; RV32ZBB-NEXT: .LBB11_12: +; RV32ZBB-NEXT: xor a4, a7, t0 +; RV32ZBB-NEXT: xor a2, a5, t0 +; RV32ZBB-NEXT: xor a5, a6, t0 +; RV32ZBB-NEXT: xor a3, a3, t0 +; RV32ZBB-NEXT: sltu a7, a4, t0 ; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: add t1, t1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: sltu a4, a2, a7 ; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a5, t1, a6 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sltu a7, a3, t0 +; RV32ZBB-NEXT: mv t1, a7 +; RV32ZBB-NEXT: beqz a6, .LBB11_12 +; RV32ZBB-NEXT: # %bb.11: +; RV32ZBB-NEXT: sltu t1, a5, t0 +; RV32ZBB-NEXT: .LBB11_12: +; RV32ZBB-NEXT: add a4, a4, a1 +; RV32ZBB-NEXT: add a5, a5, a1 +; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: sltu a3, a4, t1 +; RV32ZBB-NEXT: sub a4, a4, t1 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_ext_i128: @@ -741,75 +741,75 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_ext_i128_undef: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a6, 4(a2) +; RV32I-NEXT: lw a5, 8(a2) ; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw t1, 4(a1) ; RV32I-NEXT: lw a2, 8(a1) ; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw t0, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a2, a6 +; RV32I-NEXT: sltu a1, a2, a5 ; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t2, t0, a3 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, a5, .LBB12_2 +; RV32I-NEXT: sub a7, a7, a1 +; RV32I-NEXT: sltu a1, t0, a3 +; RV32I-NEXT: mv t2, a1 +; RV32I-NEXT: beq t1, a6, .LBB12_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a5 +; RV32I-NEXT: sltu t2, t1, a6 ; RV32I-NEXT: .LBB12_2: -; RV32I-NEXT: sub t3, a2, a6 -; RV32I-NEXT: sltu a6, t3, t1 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a7, t3, t1 -; RV32I-NEXT: beq a6, a4, .LBB12_4 +; RV32I-NEXT: sub t3, a2, a5 +; RV32I-NEXT: sltu a5, t3, t2 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: sub a7, t3, t2 +; RV32I-NEXT: beq a5, a4, .LBB12_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a4, a6 +; RV32I-NEXT: sltu t2, a4, a5 ; RV32I-NEXT: j .LBB12_5 ; RV32I-NEXT: .LBB12_4: -; RV32I-NEXT: sltu t1, a2, a7 +; RV32I-NEXT: sltu t2, a2, a7 ; RV32I-NEXT: .LBB12_5: -; RV32I-NEXT: sub a5, a1, a5 -; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a6, t1, a6 +; RV32I-NEXT: sub a6, a6, a1 ; RV32I-NEXT: sub a3, t0, a3 -; RV32I-NEXT: beq a5, a1, .LBB12_7 +; RV32I-NEXT: beq a6, t1, .LBB12_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: sltu a1, t1, a6 ; RV32I-NEXT: j .LBB12_8 ; RV32I-NEXT: .LBB12_7: ; RV32I-NEXT: sltu a1, t0, a3 ; RV32I-NEXT: .LBB12_8: -; RV32I-NEXT: xor a4, a6, a4 +; RV32I-NEXT: xor a4, a5, a4 ; RV32I-NEXT: xor a2, a7, a2 ; RV32I-NEXT: or a2, a2, a4 ; RV32I-NEXT: beqz a2, .LBB12_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a1, t1 +; RV32I-NEXT: mv a1, t2 ; RV32I-NEXT: .LBB12_10: ; RV32I-NEXT: neg t0, a1 -; RV32I-NEXT: xor a2, a7, t0 -; RV32I-NEXT: xor a6, a6, t0 -; RV32I-NEXT: xor a4, a3, t0 -; RV32I-NEXT: sltu a3, a2, t0 -; RV32I-NEXT: add a7, a6, a1 -; RV32I-NEXT: sltu a6, a4, t0 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: xor t1, a5, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a5, .LBB12_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a7, t1, t0 -; RV32I-NEXT: .LBB12_12: +; RV32I-NEXT: xor a4, a7, t0 +; RV32I-NEXT: xor a2, a5, t0 +; RV32I-NEXT: xor a5, a6, t0 +; RV32I-NEXT: xor a3, a3, t0 +; RV32I-NEXT: sltu a7, a4, t0 ; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: add t1, t1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: sltu a4, a2, a7 ; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a5, t1, a6 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sltu a7, a3, t0 +; RV32I-NEXT: mv t1, a7 +; RV32I-NEXT: beqz a6, .LBB12_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sltu t1, a5, t0 +; RV32I-NEXT: .LBB12_12: +; RV32I-NEXT: add a4, a4, a1 +; RV32I-NEXT: add a5, a5, a1 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: sltu a3, a4, t1 +; RV32I-NEXT: sub a4, a4, t1 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_ext_i128_undef: @@ -837,75 +837,75 @@ define i128 @abd_ext_i128_undef(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_ext_i128_undef: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) +; RV32ZBB-NEXT: lw a5, 8(a2) ; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw t0, 0(a1) +; RV32ZBB-NEXT: lw t1, 4(a1) ; RV32ZBB-NEXT: lw a2, 8(a1) ; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw t0, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a2, a6 +; RV32ZBB-NEXT: sltu a1, a2, a5 ; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t2, t0, a3 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB12_2 +; RV32ZBB-NEXT: sub a7, a7, a1 +; RV32ZBB-NEXT: sltu a1, t0, a3 +; RV32ZBB-NEXT: mv t2, a1 +; RV32ZBB-NEXT: beq t1, a6, .LBB12_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a5 +; RV32ZBB-NEXT: sltu t2, t1, a6 ; RV32ZBB-NEXT: .LBB12_2: -; RV32ZBB-NEXT: sub t3, a2, a6 -; RV32ZBB-NEXT: sltu a6, t3, t1 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a7, t3, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB12_4 +; RV32ZBB-NEXT: sub t3, a2, a5 +; RV32ZBB-NEXT: sltu a5, t3, t2 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: sub a7, t3, t2 +; RV32ZBB-NEXT: beq a5, a4, .LBB12_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a4, a6 +; RV32ZBB-NEXT: sltu t2, a4, a5 ; RV32ZBB-NEXT: j .LBB12_5 ; RV32ZBB-NEXT: .LBB12_4: -; RV32ZBB-NEXT: sltu t1, a2, a7 +; RV32ZBB-NEXT: sltu t2, a2, a7 ; RV32ZBB-NEXT: .LBB12_5: -; RV32ZBB-NEXT: sub a5, a1, a5 -; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a6, t1, a6 +; RV32ZBB-NEXT: sub a6, a6, a1 ; RV32ZBB-NEXT: sub a3, t0, a3 -; RV32ZBB-NEXT: beq a5, a1, .LBB12_7 +; RV32ZBB-NEXT: beq a6, t1, .LBB12_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, a5 +; RV32ZBB-NEXT: sltu a1, t1, a6 ; RV32ZBB-NEXT: j .LBB12_8 ; RV32ZBB-NEXT: .LBB12_7: ; RV32ZBB-NEXT: sltu a1, t0, a3 ; RV32ZBB-NEXT: .LBB12_8: -; RV32ZBB-NEXT: xor a4, a6, a4 +; RV32ZBB-NEXT: xor a4, a5, a4 ; RV32ZBB-NEXT: xor a2, a7, a2 ; RV32ZBB-NEXT: or a2, a2, a4 ; RV32ZBB-NEXT: beqz a2, .LBB12_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: mv a1, t1 +; RV32ZBB-NEXT: mv a1, t2 ; RV32ZBB-NEXT: .LBB12_10: ; RV32ZBB-NEXT: neg t0, a1 -; RV32ZBB-NEXT: xor a2, a7, t0 -; RV32ZBB-NEXT: xor a6, a6, t0 -; RV32ZBB-NEXT: xor a4, a3, t0 -; RV32ZBB-NEXT: sltu a3, a2, t0 -; RV32ZBB-NEXT: add a7, a6, a1 -; RV32ZBB-NEXT: sltu a6, a4, t0 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: xor t1, a5, t0 -; RV32ZBB-NEXT: mv a7, a6 -; RV32ZBB-NEXT: beqz a5, .LBB12_12 -; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a7, t1, t0 -; RV32ZBB-NEXT: .LBB12_12: +; RV32ZBB-NEXT: xor a4, a7, t0 +; RV32ZBB-NEXT: xor a2, a5, t0 +; RV32ZBB-NEXT: xor a5, a6, t0 +; RV32ZBB-NEXT: xor a3, a3, t0 +; RV32ZBB-NEXT: sltu a7, a4, t0 ; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: add t1, t1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: sltu a4, a2, a7 ; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a5, t1, a6 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sltu a7, a3, t0 +; RV32ZBB-NEXT: mv t1, a7 +; RV32ZBB-NEXT: beqz a6, .LBB12_12 +; RV32ZBB-NEXT: # %bb.11: +; RV32ZBB-NEXT: sltu t1, a5, t0 +; RV32ZBB-NEXT: .LBB12_12: +; RV32ZBB-NEXT: add a4, a4, a1 +; RV32ZBB-NEXT: add a5, a5, a1 +; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: sltu a3, a4, t1 +; RV32ZBB-NEXT: sub a4, a4, t1 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_ext_i128_undef: @@ -1132,75 +1132,75 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_minmax_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a6, 4(a2) +; RV32I-NEXT: lw a5, 8(a2) ; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw t1, 4(a1) ; RV32I-NEXT: lw a2, 8(a1) ; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw t0, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a2, a6 +; RV32I-NEXT: sltu a1, a2, a5 ; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t2, t0, a3 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, a5, .LBB17_2 +; RV32I-NEXT: sub a7, a7, a1 +; RV32I-NEXT: sltu a1, t0, a3 +; RV32I-NEXT: mv t2, a1 +; RV32I-NEXT: beq t1, a6, .LBB17_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a5 +; RV32I-NEXT: sltu t2, t1, a6 ; RV32I-NEXT: .LBB17_2: -; RV32I-NEXT: sub t3, a2, a6 -; RV32I-NEXT: sltu a6, t3, t1 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a7, t3, t1 -; RV32I-NEXT: beq a6, a4, .LBB17_4 +; RV32I-NEXT: sub t3, a2, a5 +; RV32I-NEXT: sltu a5, t3, t2 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: sub a7, t3, t2 +; RV32I-NEXT: beq a5, a4, .LBB17_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a4, a6 +; RV32I-NEXT: sltu t2, a4, a5 ; RV32I-NEXT: j .LBB17_5 ; RV32I-NEXT: .LBB17_4: -; RV32I-NEXT: sltu t1, a2, a7 +; RV32I-NEXT: sltu t2, a2, a7 ; RV32I-NEXT: .LBB17_5: -; RV32I-NEXT: sub a5, a1, a5 -; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a6, t1, a6 +; RV32I-NEXT: sub a6, a6, a1 ; RV32I-NEXT: sub a3, t0, a3 -; RV32I-NEXT: beq a5, a1, .LBB17_7 +; RV32I-NEXT: beq a6, t1, .LBB17_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: sltu a1, t1, a6 ; RV32I-NEXT: j .LBB17_8 ; RV32I-NEXT: .LBB17_7: ; RV32I-NEXT: sltu a1, t0, a3 ; RV32I-NEXT: .LBB17_8: -; RV32I-NEXT: xor a4, a6, a4 +; RV32I-NEXT: xor a4, a5, a4 ; RV32I-NEXT: xor a2, a7, a2 ; RV32I-NEXT: or a2, a2, a4 ; RV32I-NEXT: beqz a2, .LBB17_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a1, t1 +; RV32I-NEXT: mv a1, t2 ; RV32I-NEXT: .LBB17_10: ; RV32I-NEXT: neg t0, a1 -; RV32I-NEXT: xor a2, a7, t0 -; RV32I-NEXT: xor a6, a6, t0 -; RV32I-NEXT: xor a4, a3, t0 -; RV32I-NEXT: sltu a3, a2, t0 -; RV32I-NEXT: add a7, a6, a1 -; RV32I-NEXT: sltu a6, a4, t0 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: xor t1, a5, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a5, .LBB17_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a7, t1, t0 -; RV32I-NEXT: .LBB17_12: +; RV32I-NEXT: xor a4, a7, t0 +; RV32I-NEXT: xor a2, a5, t0 +; RV32I-NEXT: xor a5, a6, t0 +; RV32I-NEXT: xor a3, a3, t0 +; RV32I-NEXT: sltu a7, a4, t0 ; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: add t1, t1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: sltu a4, a2, a7 ; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a5, t1, a6 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sltu a7, a3, t0 +; RV32I-NEXT: mv t1, a7 +; RV32I-NEXT: beqz a6, .LBB17_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sltu t1, a5, t0 +; RV32I-NEXT: .LBB17_12: +; RV32I-NEXT: add a4, a4, a1 +; RV32I-NEXT: add a5, a5, a1 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: sltu a3, a4, t1 +; RV32I-NEXT: sub a4, a4, t1 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_minmax_i128: @@ -1228,75 +1228,75 @@ define i128 @abd_minmax_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_minmax_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) +; RV32ZBB-NEXT: lw a5, 8(a2) ; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw t0, 0(a1) +; RV32ZBB-NEXT: lw t1, 4(a1) ; RV32ZBB-NEXT: lw a2, 8(a1) ; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw t0, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a2, a6 +; RV32ZBB-NEXT: sltu a1, a2, a5 ; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t2, t0, a3 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB17_2 +; RV32ZBB-NEXT: sub a7, a7, a1 +; RV32ZBB-NEXT: sltu a1, t0, a3 +; RV32ZBB-NEXT: mv t2, a1 +; RV32ZBB-NEXT: beq t1, a6, .LBB17_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a5 +; RV32ZBB-NEXT: sltu t2, t1, a6 ; RV32ZBB-NEXT: .LBB17_2: -; RV32ZBB-NEXT: sub t3, a2, a6 -; RV32ZBB-NEXT: sltu a6, t3, t1 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a7, t3, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB17_4 +; RV32ZBB-NEXT: sub t3, a2, a5 +; RV32ZBB-NEXT: sltu a5, t3, t2 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: sub a7, t3, t2 +; RV32ZBB-NEXT: beq a5, a4, .LBB17_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a4, a6 +; RV32ZBB-NEXT: sltu t2, a4, a5 ; RV32ZBB-NEXT: j .LBB17_5 ; RV32ZBB-NEXT: .LBB17_4: -; RV32ZBB-NEXT: sltu t1, a2, a7 +; RV32ZBB-NEXT: sltu t2, a2, a7 ; RV32ZBB-NEXT: .LBB17_5: -; RV32ZBB-NEXT: sub a5, a1, a5 -; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a6, t1, a6 +; RV32ZBB-NEXT: sub a6, a6, a1 ; RV32ZBB-NEXT: sub a3, t0, a3 -; RV32ZBB-NEXT: beq a5, a1, .LBB17_7 +; RV32ZBB-NEXT: beq a6, t1, .LBB17_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, a5 +; RV32ZBB-NEXT: sltu a1, t1, a6 ; RV32ZBB-NEXT: j .LBB17_8 ; RV32ZBB-NEXT: .LBB17_7: ; RV32ZBB-NEXT: sltu a1, t0, a3 ; RV32ZBB-NEXT: .LBB17_8: -; RV32ZBB-NEXT: xor a4, a6, a4 +; RV32ZBB-NEXT: xor a4, a5, a4 ; RV32ZBB-NEXT: xor a2, a7, a2 ; RV32ZBB-NEXT: or a2, a2, a4 ; RV32ZBB-NEXT: beqz a2, .LBB17_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: mv a1, t1 +; RV32ZBB-NEXT: mv a1, t2 ; RV32ZBB-NEXT: .LBB17_10: ; RV32ZBB-NEXT: neg t0, a1 -; RV32ZBB-NEXT: xor a2, a7, t0 -; RV32ZBB-NEXT: xor a6, a6, t0 -; RV32ZBB-NEXT: xor a4, a3, t0 -; RV32ZBB-NEXT: sltu a3, a2, t0 -; RV32ZBB-NEXT: add a7, a6, a1 -; RV32ZBB-NEXT: sltu a6, a4, t0 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: xor t1, a5, t0 -; RV32ZBB-NEXT: mv a7, a6 -; RV32ZBB-NEXT: beqz a5, .LBB17_12 -; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a7, t1, t0 -; RV32ZBB-NEXT: .LBB17_12: +; RV32ZBB-NEXT: xor a4, a7, t0 +; RV32ZBB-NEXT: xor a2, a5, t0 +; RV32ZBB-NEXT: xor a5, a6, t0 +; RV32ZBB-NEXT: xor a3, a3, t0 +; RV32ZBB-NEXT: sltu a7, a4, t0 ; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: add t1, t1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: sltu a4, a2, a7 ; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a5, t1, a6 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sltu a7, a3, t0 +; RV32ZBB-NEXT: mv t1, a7 +; RV32ZBB-NEXT: beqz a6, .LBB17_12 +; RV32ZBB-NEXT: # %bb.11: +; RV32ZBB-NEXT: sltu t1, a5, t0 +; RV32ZBB-NEXT: .LBB17_12: +; RV32ZBB-NEXT: add a4, a4, a1 +; RV32ZBB-NEXT: add a5, a5, a1 +; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: sltu a3, a4, t1 +; RV32ZBB-NEXT: sub a4, a4, t1 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_minmax_i128: @@ -1525,75 +1525,75 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_cmp_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a6, 4(a2) +; RV32I-NEXT: lw a5, 8(a2) ; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw t1, 4(a1) ; RV32I-NEXT: lw a2, 8(a1) ; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw t0, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a2, a6 +; RV32I-NEXT: sltu a1, a2, a5 ; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t2, t0, a3 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, a5, .LBB22_2 +; RV32I-NEXT: sub a7, a7, a1 +; RV32I-NEXT: sltu a1, t0, a3 +; RV32I-NEXT: mv t2, a1 +; RV32I-NEXT: beq t1, a6, .LBB22_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a5 +; RV32I-NEXT: sltu t2, t1, a6 ; RV32I-NEXT: .LBB22_2: -; RV32I-NEXT: sub t3, a2, a6 -; RV32I-NEXT: sltu a6, t3, t1 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a7, t3, t1 -; RV32I-NEXT: beq a6, a4, .LBB22_4 +; RV32I-NEXT: sub t3, a2, a5 +; RV32I-NEXT: sltu a5, t3, t2 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: sub a7, t3, t2 +; RV32I-NEXT: beq a5, a4, .LBB22_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a4, a6 +; RV32I-NEXT: sltu t2, a4, a5 ; RV32I-NEXT: j .LBB22_5 ; RV32I-NEXT: .LBB22_4: -; RV32I-NEXT: sltu t1, a2, a7 +; RV32I-NEXT: sltu t2, a2, a7 ; RV32I-NEXT: .LBB22_5: -; RV32I-NEXT: sub a5, a1, a5 -; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a6, t1, a6 +; RV32I-NEXT: sub a6, a6, a1 ; RV32I-NEXT: sub a3, t0, a3 -; RV32I-NEXT: beq a5, a1, .LBB22_7 +; RV32I-NEXT: beq a6, t1, .LBB22_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: sltu a1, t1, a6 ; RV32I-NEXT: j .LBB22_8 ; RV32I-NEXT: .LBB22_7: ; RV32I-NEXT: sltu a1, t0, a3 ; RV32I-NEXT: .LBB22_8: -; RV32I-NEXT: xor a4, a6, a4 +; RV32I-NEXT: xor a4, a5, a4 ; RV32I-NEXT: xor a2, a7, a2 ; RV32I-NEXT: or a2, a2, a4 ; RV32I-NEXT: beqz a2, .LBB22_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a1, t1 +; RV32I-NEXT: mv a1, t2 ; RV32I-NEXT: .LBB22_10: ; RV32I-NEXT: neg t0, a1 -; RV32I-NEXT: xor a2, a7, t0 -; RV32I-NEXT: xor a6, a6, t0 -; RV32I-NEXT: xor a4, a3, t0 -; RV32I-NEXT: sltu a3, a2, t0 -; RV32I-NEXT: add a7, a6, a1 -; RV32I-NEXT: sltu a6, a4, t0 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: xor t1, a5, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a5, .LBB22_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a7, t1, t0 -; RV32I-NEXT: .LBB22_12: +; RV32I-NEXT: xor a4, a7, t0 +; RV32I-NEXT: xor a2, a5, t0 +; RV32I-NEXT: xor a5, a6, t0 +; RV32I-NEXT: xor a3, a3, t0 +; RV32I-NEXT: sltu a7, a4, t0 ; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: add t1, t1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: sltu a4, a2, a7 ; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a5, t1, a6 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sltu a7, a3, t0 +; RV32I-NEXT: mv t1, a7 +; RV32I-NEXT: beqz a6, .LBB22_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sltu t1, a5, t0 +; RV32I-NEXT: .LBB22_12: +; RV32I-NEXT: add a4, a4, a1 +; RV32I-NEXT: add a5, a5, a1 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: sltu a3, a4, t1 +; RV32I-NEXT: sub a4, a4, t1 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_cmp_i128: @@ -1621,75 +1621,75 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_cmp_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) +; RV32ZBB-NEXT: lw a5, 8(a2) ; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw t0, 0(a1) +; RV32ZBB-NEXT: lw t1, 4(a1) ; RV32ZBB-NEXT: lw a2, 8(a1) ; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw t0, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a2, a6 +; RV32ZBB-NEXT: sltu a1, a2, a5 ; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t2, t0, a3 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB22_2 +; RV32ZBB-NEXT: sub a7, a7, a1 +; RV32ZBB-NEXT: sltu a1, t0, a3 +; RV32ZBB-NEXT: mv t2, a1 +; RV32ZBB-NEXT: beq t1, a6, .LBB22_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a5 +; RV32ZBB-NEXT: sltu t2, t1, a6 ; RV32ZBB-NEXT: .LBB22_2: -; RV32ZBB-NEXT: sub t3, a2, a6 -; RV32ZBB-NEXT: sltu a6, t3, t1 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a7, t3, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB22_4 +; RV32ZBB-NEXT: sub t3, a2, a5 +; RV32ZBB-NEXT: sltu a5, t3, t2 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: sub a7, t3, t2 +; RV32ZBB-NEXT: beq a5, a4, .LBB22_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a4, a6 +; RV32ZBB-NEXT: sltu t2, a4, a5 ; RV32ZBB-NEXT: j .LBB22_5 ; RV32ZBB-NEXT: .LBB22_4: -; RV32ZBB-NEXT: sltu t1, a2, a7 +; RV32ZBB-NEXT: sltu t2, a2, a7 ; RV32ZBB-NEXT: .LBB22_5: -; RV32ZBB-NEXT: sub a5, a1, a5 -; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a6, t1, a6 +; RV32ZBB-NEXT: sub a6, a6, a1 ; RV32ZBB-NEXT: sub a3, t0, a3 -; RV32ZBB-NEXT: beq a5, a1, .LBB22_7 +; RV32ZBB-NEXT: beq a6, t1, .LBB22_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, a5 +; RV32ZBB-NEXT: sltu a1, t1, a6 ; RV32ZBB-NEXT: j .LBB22_8 ; RV32ZBB-NEXT: .LBB22_7: ; RV32ZBB-NEXT: sltu a1, t0, a3 ; RV32ZBB-NEXT: .LBB22_8: -; RV32ZBB-NEXT: xor a4, a6, a4 +; RV32ZBB-NEXT: xor a4, a5, a4 ; RV32ZBB-NEXT: xor a2, a7, a2 ; RV32ZBB-NEXT: or a2, a2, a4 ; RV32ZBB-NEXT: beqz a2, .LBB22_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: mv a1, t1 +; RV32ZBB-NEXT: mv a1, t2 ; RV32ZBB-NEXT: .LBB22_10: ; RV32ZBB-NEXT: neg t0, a1 -; RV32ZBB-NEXT: xor a2, a7, t0 -; RV32ZBB-NEXT: xor a6, a6, t0 -; RV32ZBB-NEXT: xor a4, a3, t0 -; RV32ZBB-NEXT: sltu a3, a2, t0 -; RV32ZBB-NEXT: add a7, a6, a1 -; RV32ZBB-NEXT: sltu a6, a4, t0 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: xor t1, a5, t0 -; RV32ZBB-NEXT: mv a7, a6 -; RV32ZBB-NEXT: beqz a5, .LBB22_12 -; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a7, t1, t0 -; RV32ZBB-NEXT: .LBB22_12: +; RV32ZBB-NEXT: xor a4, a7, t0 +; RV32ZBB-NEXT: xor a2, a5, t0 +; RV32ZBB-NEXT: xor a5, a6, t0 +; RV32ZBB-NEXT: xor a3, a3, t0 +; RV32ZBB-NEXT: sltu a7, a4, t0 ; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: add t1, t1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: sltu a4, a2, a7 ; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a5, t1, a6 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sltu a7, a3, t0 +; RV32ZBB-NEXT: mv t1, a7 +; RV32ZBB-NEXT: beqz a6, .LBB22_12 +; RV32ZBB-NEXT: # %bb.11: +; RV32ZBB-NEXT: sltu t1, a5, t0 +; RV32ZBB-NEXT: .LBB22_12: +; RV32ZBB-NEXT: add a4, a4, a1 +; RV32ZBB-NEXT: add a5, a5, a1 +; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: sltu a3, a4, t1 +; RV32ZBB-NEXT: sub a4, a4, t1 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_cmp_i128: @@ -1919,75 +1919,75 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_select_i128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a3, 0(a2) -; RV32I-NEXT: lw a5, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) +; RV32I-NEXT: lw a6, 4(a2) +; RV32I-NEXT: lw a5, 8(a2) ; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw t1, 4(a1) ; RV32I-NEXT: lw a2, 8(a1) ; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: lw t0, 0(a1) -; RV32I-NEXT: lw a1, 4(a1) -; RV32I-NEXT: sltu t1, a2, a6 +; RV32I-NEXT: sltu a1, a2, a5 ; RV32I-NEXT: sub a7, a4, a7 -; RV32I-NEXT: sltu t2, t0, a3 -; RV32I-NEXT: sub a7, a7, t1 -; RV32I-NEXT: mv t1, t2 -; RV32I-NEXT: beq a1, a5, .LBB27_2 +; RV32I-NEXT: sub a7, a7, a1 +; RV32I-NEXT: sltu a1, t0, a3 +; RV32I-NEXT: mv t2, a1 +; RV32I-NEXT: beq t1, a6, .LBB27_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a1, a5 +; RV32I-NEXT: sltu t2, t1, a6 ; RV32I-NEXT: .LBB27_2: -; RV32I-NEXT: sub t3, a2, a6 -; RV32I-NEXT: sltu a6, t3, t1 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: sub a7, t3, t1 -; RV32I-NEXT: beq a6, a4, .LBB27_4 +; RV32I-NEXT: sub t3, a2, a5 +; RV32I-NEXT: sltu a5, t3, t2 +; RV32I-NEXT: sub a5, a7, a5 +; RV32I-NEXT: sub a7, t3, t2 +; RV32I-NEXT: beq a5, a4, .LBB27_4 ; RV32I-NEXT: # %bb.3: -; RV32I-NEXT: sltu t1, a4, a6 +; RV32I-NEXT: sltu t2, a4, a5 ; RV32I-NEXT: j .LBB27_5 ; RV32I-NEXT: .LBB27_4: -; RV32I-NEXT: sltu t1, a2, a7 +; RV32I-NEXT: sltu t2, a2, a7 ; RV32I-NEXT: .LBB27_5: -; RV32I-NEXT: sub a5, a1, a5 -; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a6, t1, a6 +; RV32I-NEXT: sub a6, a6, a1 ; RV32I-NEXT: sub a3, t0, a3 -; RV32I-NEXT: beq a5, a1, .LBB27_7 +; RV32I-NEXT: beq a6, t1, .LBB27_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: sltu a1, t1, a6 ; RV32I-NEXT: j .LBB27_8 ; RV32I-NEXT: .LBB27_7: ; RV32I-NEXT: sltu a1, t0, a3 ; RV32I-NEXT: .LBB27_8: -; RV32I-NEXT: xor a4, a6, a4 +; RV32I-NEXT: xor a4, a5, a4 ; RV32I-NEXT: xor a2, a7, a2 ; RV32I-NEXT: or a2, a2, a4 ; RV32I-NEXT: beqz a2, .LBB27_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a1, t1 +; RV32I-NEXT: mv a1, t2 ; RV32I-NEXT: .LBB27_10: ; RV32I-NEXT: neg t0, a1 -; RV32I-NEXT: xor a2, a7, t0 -; RV32I-NEXT: xor a6, a6, t0 -; RV32I-NEXT: xor a4, a3, t0 -; RV32I-NEXT: sltu a3, a2, t0 -; RV32I-NEXT: add a7, a6, a1 -; RV32I-NEXT: sltu a6, a4, t0 -; RV32I-NEXT: sub a3, a7, a3 -; RV32I-NEXT: xor t1, a5, t0 -; RV32I-NEXT: mv a7, a6 -; RV32I-NEXT: beqz a5, .LBB27_12 -; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: sltu a7, t1, t0 -; RV32I-NEXT: .LBB27_12: +; RV32I-NEXT: xor a4, a7, t0 +; RV32I-NEXT: xor a2, a5, t0 +; RV32I-NEXT: xor a5, a6, t0 +; RV32I-NEXT: xor a3, a3, t0 +; RV32I-NEXT: sltu a7, a4, t0 ; RV32I-NEXT: add a2, a2, a1 -; RV32I-NEXT: add t1, t1, a1 -; RV32I-NEXT: add a1, a4, a1 -; RV32I-NEXT: sltu a4, a2, a7 ; RV32I-NEXT: sub a2, a2, a7 -; RV32I-NEXT: sub a5, t1, a6 -; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sltu a7, a3, t0 +; RV32I-NEXT: mv t1, a7 +; RV32I-NEXT: beqz a6, .LBB27_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sltu t1, a5, t0 +; RV32I-NEXT: .LBB27_12: +; RV32I-NEXT: add a4, a4, a1 +; RV32I-NEXT: add a5, a5, a1 +; RV32I-NEXT: add a1, a3, a1 +; RV32I-NEXT: sltu a3, a4, t1 +; RV32I-NEXT: sub a4, a4, t1 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sub a2, a2, a3 ; RV32I-NEXT: sw a1, 0(a0) ; RV32I-NEXT: sw a5, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a2, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_select_i128: @@ -2015,75 +2015,75 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32ZBB-LABEL: abd_select_i128: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: lw a3, 0(a2) -; RV32ZBB-NEXT: lw a5, 4(a2) -; RV32ZBB-NEXT: lw a6, 8(a2) +; RV32ZBB-NEXT: lw a6, 4(a2) +; RV32ZBB-NEXT: lw a5, 8(a2) ; RV32ZBB-NEXT: lw a7, 12(a2) +; RV32ZBB-NEXT: lw t0, 0(a1) +; RV32ZBB-NEXT: lw t1, 4(a1) ; RV32ZBB-NEXT: lw a2, 8(a1) ; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw t0, 0(a1) -; RV32ZBB-NEXT: lw a1, 4(a1) -; RV32ZBB-NEXT: sltu t1, a2, a6 +; RV32ZBB-NEXT: sltu a1, a2, a5 ; RV32ZBB-NEXT: sub a7, a4, a7 -; RV32ZBB-NEXT: sltu t2, t0, a3 -; RV32ZBB-NEXT: sub a7, a7, t1 -; RV32ZBB-NEXT: mv t1, t2 -; RV32ZBB-NEXT: beq a1, a5, .LBB27_2 +; RV32ZBB-NEXT: sub a7, a7, a1 +; RV32ZBB-NEXT: sltu a1, t0, a3 +; RV32ZBB-NEXT: mv t2, a1 +; RV32ZBB-NEXT: beq t1, a6, .LBB27_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: sltu t1, a1, a5 +; RV32ZBB-NEXT: sltu t2, t1, a6 ; RV32ZBB-NEXT: .LBB27_2: -; RV32ZBB-NEXT: sub t3, a2, a6 -; RV32ZBB-NEXT: sltu a6, t3, t1 -; RV32ZBB-NEXT: sub a6, a7, a6 -; RV32ZBB-NEXT: sub a7, t3, t1 -; RV32ZBB-NEXT: beq a6, a4, .LBB27_4 +; RV32ZBB-NEXT: sub t3, a2, a5 +; RV32ZBB-NEXT: sltu a5, t3, t2 +; RV32ZBB-NEXT: sub a5, a7, a5 +; RV32ZBB-NEXT: sub a7, t3, t2 +; RV32ZBB-NEXT: beq a5, a4, .LBB27_4 ; RV32ZBB-NEXT: # %bb.3: -; RV32ZBB-NEXT: sltu t1, a4, a6 +; RV32ZBB-NEXT: sltu t2, a4, a5 ; RV32ZBB-NEXT: j .LBB27_5 ; RV32ZBB-NEXT: .LBB27_4: -; RV32ZBB-NEXT: sltu t1, a2, a7 +; RV32ZBB-NEXT: sltu t2, a2, a7 ; RV32ZBB-NEXT: .LBB27_5: -; RV32ZBB-NEXT: sub a5, a1, a5 -; RV32ZBB-NEXT: sub a5, a5, t2 +; RV32ZBB-NEXT: sub a6, t1, a6 +; RV32ZBB-NEXT: sub a6, a6, a1 ; RV32ZBB-NEXT: sub a3, t0, a3 -; RV32ZBB-NEXT: beq a5, a1, .LBB27_7 +; RV32ZBB-NEXT: beq a6, t1, .LBB27_7 ; RV32ZBB-NEXT: # %bb.6: -; RV32ZBB-NEXT: sltu a1, a1, a5 +; RV32ZBB-NEXT: sltu a1, t1, a6 ; RV32ZBB-NEXT: j .LBB27_8 ; RV32ZBB-NEXT: .LBB27_7: ; RV32ZBB-NEXT: sltu a1, t0, a3 ; RV32ZBB-NEXT: .LBB27_8: -; RV32ZBB-NEXT: xor a4, a6, a4 +; RV32ZBB-NEXT: xor a4, a5, a4 ; RV32ZBB-NEXT: xor a2, a7, a2 ; RV32ZBB-NEXT: or a2, a2, a4 ; RV32ZBB-NEXT: beqz a2, .LBB27_10 ; RV32ZBB-NEXT: # %bb.9: -; RV32ZBB-NEXT: mv a1, t1 +; RV32ZBB-NEXT: mv a1, t2 ; RV32ZBB-NEXT: .LBB27_10: ; RV32ZBB-NEXT: neg t0, a1 -; RV32ZBB-NEXT: xor a2, a7, t0 -; RV32ZBB-NEXT: xor a6, a6, t0 -; RV32ZBB-NEXT: xor a4, a3, t0 -; RV32ZBB-NEXT: sltu a3, a2, t0 -; RV32ZBB-NEXT: add a7, a6, a1 -; RV32ZBB-NEXT: sltu a6, a4, t0 -; RV32ZBB-NEXT: sub a3, a7, a3 -; RV32ZBB-NEXT: xor t1, a5, t0 -; RV32ZBB-NEXT: mv a7, a6 -; RV32ZBB-NEXT: beqz a5, .LBB27_12 -; RV32ZBB-NEXT: # %bb.11: -; RV32ZBB-NEXT: sltu a7, t1, t0 -; RV32ZBB-NEXT: .LBB27_12: +; RV32ZBB-NEXT: xor a4, a7, t0 +; RV32ZBB-NEXT: xor a2, a5, t0 +; RV32ZBB-NEXT: xor a5, a6, t0 +; RV32ZBB-NEXT: xor a3, a3, t0 +; RV32ZBB-NEXT: sltu a7, a4, t0 ; RV32ZBB-NEXT: add a2, a2, a1 -; RV32ZBB-NEXT: add t1, t1, a1 -; RV32ZBB-NEXT: add a1, a4, a1 -; RV32ZBB-NEXT: sltu a4, a2, a7 ; RV32ZBB-NEXT: sub a2, a2, a7 -; RV32ZBB-NEXT: sub a5, t1, a6 -; RV32ZBB-NEXT: sub a3, a3, a4 +; RV32ZBB-NEXT: sltu a7, a3, t0 +; RV32ZBB-NEXT: mv t1, a7 +; RV32ZBB-NEXT: beqz a6, .LBB27_12 +; RV32ZBB-NEXT: # %bb.11: +; RV32ZBB-NEXT: sltu t1, a5, t0 +; RV32ZBB-NEXT: .LBB27_12: +; RV32ZBB-NEXT: add a4, a4, a1 +; RV32ZBB-NEXT: add a5, a5, a1 +; RV32ZBB-NEXT: add a1, a3, a1 +; RV32ZBB-NEXT: sltu a3, a4, t1 +; RV32ZBB-NEXT: sub a4, a4, t1 +; RV32ZBB-NEXT: sub a5, a5, a7 +; RV32ZBB-NEXT: sub a2, a2, a3 ; RV32ZBB-NEXT: sw a1, 0(a0) ; RV32ZBB-NEXT: sw a5, 4(a0) -; RV32ZBB-NEXT: sw a2, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a2, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64ZBB-LABEL: abd_select_i128: diff --git a/llvm/test/CodeGen/RISCV/add-before-shl.ll b/llvm/test/CodeGen/RISCV/add-before-shl.ll index 5d4478f9d4b5f..533482e9fdeb4 100644 --- a/llvm/test/CodeGen/RISCV/add-before-shl.ll +++ b/llvm/test/CodeGen/RISCV/add-before-shl.ll @@ -200,26 +200,26 @@ define i128 @add_wide_operand(i128 %a) nounwind { ; ; RV32C-LABEL: add_wide_operand: ; RV32C: # %bb.0: -; RV32C-NEXT: c.lw a4, 12(a1) -; RV32C-NEXT: c.lw a3, 0(a1) +; RV32C-NEXT: lw a6, 0(a1) ; RV32C-NEXT: c.lw a2, 4(a1) -; RV32C-NEXT: c.lw a1, 8(a1) +; RV32C-NEXT: c.lw a4, 8(a1) +; RV32C-NEXT: c.lw a1, 12(a1) ; RV32C-NEXT: c.lui a5, 16 -; RV32C-NEXT: add a6, a4, a5 -; RV32C-NEXT: srli a5, a3, 29 -; RV32C-NEXT: slli a4, a2, 3 -; RV32C-NEXT: c.or a4, a5 -; RV32C-NEXT: srli a5, a1, 29 +; RV32C-NEXT: c.add a1, a5 +; RV32C-NEXT: srli a5, a6, 29 +; RV32C-NEXT: slli a3, a2, 3 +; RV32C-NEXT: c.or a3, a5 +; RV32C-NEXT: srli a5, a4, 29 ; RV32C-NEXT: c.srli a2, 29 -; RV32C-NEXT: c.slli a1, 3 -; RV32C-NEXT: c.slli a3, 3 +; RV32C-NEXT: c.slli a4, 3 ; RV32C-NEXT: c.slli a6, 3 -; RV32C-NEXT: c.or a1, a2 -; RV32C-NEXT: or a2, a6, a5 -; RV32C-NEXT: c.sw a3, 0(a0) -; RV32C-NEXT: c.sw a4, 4(a0) -; RV32C-NEXT: c.sw a1, 8(a0) -; RV32C-NEXT: c.sw a2, 12(a0) +; RV32C-NEXT: c.slli a1, 3 +; RV32C-NEXT: c.or a2, a4 +; RV32C-NEXT: c.or a1, a5 +; RV32C-NEXT: sw a6, 0(a0) +; RV32C-NEXT: c.sw a3, 4(a0) +; RV32C-NEXT: c.sw a2, 8(a0) +; RV32C-NEXT: c.sw a1, 12(a0) ; RV32C-NEXT: c.jr ra ; ; RV64C-LABEL: add_wide_operand: diff --git a/llvm/test/CodeGen/RISCV/add-imm.ll b/llvm/test/CodeGen/RISCV/add-imm.ll index 84deb4c00ac8d..21597beb0c483 100644 --- a/llvm/test/CodeGen/RISCV/add-imm.ll +++ b/llvm/test/CodeGen/RISCV/add-imm.ll @@ -214,28 +214,28 @@ define void @add32_reject() nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lui a0, %hi(ga) ; RV32I-NEXT: lui a1, %hi(gb) -; RV32I-NEXT: lw a2, %lo(ga)(a0) -; RV32I-NEXT: lw a3, %lo(gb)(a1) -; RV32I-NEXT: lui a4, 1 -; RV32I-NEXT: addi a4, a4, -1096 -; RV32I-NEXT: add a2, a2, a4 -; RV32I-NEXT: add a3, a3, a4 -; RV32I-NEXT: sw a2, %lo(ga)(a0) -; RV32I-NEXT: sw a3, %lo(gb)(a1) +; RV32I-NEXT: lui a2, 1 +; RV32I-NEXT: lw a3, %lo(ga)(a0) +; RV32I-NEXT: lw a4, %lo(gb)(a1) +; RV32I-NEXT: addi a2, a2, -1096 +; RV32I-NEXT: add a3, a3, a2 +; RV32I-NEXT: add a2, a4, a2 +; RV32I-NEXT: sw a3, %lo(ga)(a0) +; RV32I-NEXT: sw a2, %lo(gb)(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: add32_reject: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a0, %hi(ga) ; RV64I-NEXT: lui a1, %hi(gb) -; RV64I-NEXT: lw a2, %lo(ga)(a0) -; RV64I-NEXT: lw a3, %lo(gb)(a1) -; RV64I-NEXT: lui a4, 1 -; RV64I-NEXT: addi a4, a4, -1096 -; RV64I-NEXT: add a2, a2, a4 -; RV64I-NEXT: add a3, a3, a4 -; RV64I-NEXT: sw a2, %lo(ga)(a0) -; RV64I-NEXT: sw a3, %lo(gb)(a1) +; RV64I-NEXT: lui a2, 1 +; RV64I-NEXT: lw a3, %lo(ga)(a0) +; RV64I-NEXT: lw a4, %lo(gb)(a1) +; RV64I-NEXT: addi a2, a2, -1096 +; RV64I-NEXT: add a3, a3, a2 +; RV64I-NEXT: add a2, a4, a2 +; RV64I-NEXT: sw a3, %lo(ga)(a0) +; RV64I-NEXT: sw a2, %lo(gb)(a1) ; RV64I-NEXT: ret %1 = load i32, ptr @ga, align 4 %2 = load i32, ptr @gb, align 4 diff --git a/llvm/test/CodeGen/RISCV/alloca.ll b/llvm/test/CodeGen/RISCV/alloca.ll index 975fc93c830af..2463cd229ee7d 100644 --- a/llvm/test/CodeGen/RISCV/alloca.ll +++ b/llvm/test/CodeGen/RISCV/alloca.ll @@ -76,21 +76,21 @@ define void @alloca_callframe(i32 %n) nounwind { ; RV32I-NEXT: sub a0, sp, a0 ; RV32I-NEXT: mv sp, a0 ; RV32I-NEXT: addi sp, sp, -16 -; RV32I-NEXT: li t0, 12 -; RV32I-NEXT: li t1, 11 -; RV32I-NEXT: li t2, 10 -; RV32I-NEXT: li t3, 9 +; RV32I-NEXT: li a7, 12 +; RV32I-NEXT: li t0, 11 +; RV32I-NEXT: li t1, 10 +; RV32I-NEXT: li t2, 9 ; RV32I-NEXT: li a1, 2 ; RV32I-NEXT: li a2, 3 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 6 ; RV32I-NEXT: li a6, 7 +; RV32I-NEXT: sw t2, 0(sp) +; RV32I-NEXT: sw t1, 4(sp) +; RV32I-NEXT: sw t0, 8(sp) +; RV32I-NEXT: sw a7, 12(sp) ; RV32I-NEXT: li a7, 8 -; RV32I-NEXT: sw t3, 0(sp) -; RV32I-NEXT: sw t2, 4(sp) -; RV32I-NEXT: sw t1, 8(sp) -; RV32I-NEXT: sw t0, 12(sp) ; RV32I-NEXT: call func ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: addi sp, s0, -16 diff --git a/llvm/test/CodeGen/RISCV/alu64.ll b/llvm/test/CodeGen/RISCV/alu64.ll index f032756e007b6..8d393e894e69d 100644 --- a/llvm/test/CodeGen/RISCV/alu64.ll +++ b/llvm/test/CodeGen/RISCV/alu64.ll @@ -206,8 +206,8 @@ define i64 @sll(i64 %a, i64 %b) nounwind { ; ; RV32I-LABEL: sll: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: sll a3, a0, a2 +; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: bltz a4, .LBB11_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a1, a3 @@ -293,8 +293,8 @@ define i64 @srl(i64 %a, i64 %b) nounwind { ; ; RV32I-LABEL: srl: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: srl a3, a1, a2 +; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: bltz a4, .LBB15_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a3 @@ -322,13 +322,12 @@ define i64 @sra(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: sra: ; RV32I: # %bb.0: ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: sra a1, a1, a2 +; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: bltz a4, .LBB16_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srai a3, a3, 31 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: srai a1, a3, 31 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB16_2: ; RV32I-NEXT: srl a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll index 8534ad379ebab..4abc125ce58eb 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw-discard.ll @@ -192,41 +192,41 @@ define void @amomax_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a2 -; RV32-NEXT: mv s1, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: mv s2, a1 +; RV32-NEXT: mv s1, a1 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a4, 4(a0) ; RV32-NEXT: j .LBB11_2 ; RV32-NEXT: .LBB11_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: sw a4, 8(sp) -; RV32-NEXT: sw a5, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a4, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 -; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a0, s2 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a4, 12(sp) ; RV32-NEXT: bnez a0, .LBB11_6 ; RV32-NEXT: .LBB11_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a5, s0, .LBB11_4 +; RV32-NEXT: beq a4, s0, .LBB11_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: slt a0, s0, a5 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: slt a0, s0, a4 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: bnez a0, .LBB11_1 ; RV32-NEXT: j .LBB11_5 ; RV32-NEXT: .LBB11_4: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a4 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s1, a1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: bnez a0, .LBB11_1 ; RV32-NEXT: .LBB11_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32-NEXT: mv a2, s2 +; RV32-NEXT: mv a2, s1 ; RV32-NEXT: mv a3, s0 ; RV32-NEXT: j .LBB11_1 ; RV32-NEXT: .LBB11_6: # %atomicrmw.end @@ -268,41 +268,41 @@ define void @amomaxu_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a2 -; RV32-NEXT: mv s1, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: mv s2, a1 +; RV32-NEXT: mv s1, a1 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a4, 4(a0) ; RV32-NEXT: j .LBB13_2 ; RV32-NEXT: .LBB13_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: sw a4, 8(sp) -; RV32-NEXT: sw a5, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a4, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 -; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a0, s2 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a4, 12(sp) ; RV32-NEXT: bnez a0, .LBB13_6 ; RV32-NEXT: .LBB13_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a5, s0, .LBB13_4 +; RV32-NEXT: beq a4, s0, .LBB13_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: sltu a0, s0, a5 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s0, a4 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: bnez a0, .LBB13_1 ; RV32-NEXT: j .LBB13_5 ; RV32-NEXT: .LBB13_4: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a4 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s1, a1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: bnez a0, .LBB13_1 ; RV32-NEXT: .LBB13_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32-NEXT: mv a2, s2 +; RV32-NEXT: mv a2, s1 ; RV32-NEXT: mv a3, s0 ; RV32-NEXT: j .LBB13_1 ; RV32-NEXT: .LBB13_6: # %atomicrmw.end @@ -344,41 +344,41 @@ define void @amomin_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a2 -; RV32-NEXT: mv s1, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: mv s2, a1 +; RV32-NEXT: mv s1, a1 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a4, 4(a0) ; RV32-NEXT: j .LBB15_2 ; RV32-NEXT: .LBB15_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: sw a4, 8(sp) -; RV32-NEXT: sw a5, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a4, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 -; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a0, s2 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a4, 12(sp) ; RV32-NEXT: bnez a0, .LBB15_6 ; RV32-NEXT: .LBB15_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a5, s0, .LBB15_4 +; RV32-NEXT: beq a4, s0, .LBB15_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: slt a0, s0, a5 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: slt a0, s0, a4 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: beqz a0, .LBB15_1 ; RV32-NEXT: j .LBB15_5 ; RV32-NEXT: .LBB15_4: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a4 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s1, a1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: beqz a0, .LBB15_1 ; RV32-NEXT: .LBB15_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB15_2 Depth=1 -; RV32-NEXT: mv a2, s2 +; RV32-NEXT: mv a2, s1 ; RV32-NEXT: mv a3, s0 ; RV32-NEXT: j .LBB15_1 ; RV32-NEXT: .LBB15_6: # %atomicrmw.end @@ -420,41 +420,41 @@ define void @amominu_d_discard(ptr %a, i64 %b) nounwind { ; RV32-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a2 -; RV32-NEXT: mv s1, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: mv s2, a1 +; RV32-NEXT: mv s1, a1 +; RV32-NEXT: mv s2, a0 +; RV32-NEXT: lw a1, 0(a0) +; RV32-NEXT: lw a4, 4(a0) ; RV32-NEXT: j .LBB17_2 ; RV32-NEXT: .LBB17_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: sw a4, 8(sp) -; RV32-NEXT: sw a5, 12(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a4, 12(sp) ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 -; RV32-NEXT: mv a0, s1 +; RV32-NEXT: mv a0, s2 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 8(sp) -; RV32-NEXT: lw a5, 12(sp) +; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a4, 12(sp) ; RV32-NEXT: bnez a0, .LBB17_6 ; RV32-NEXT: .LBB17_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: beq a5, s0, .LBB17_4 +; RV32-NEXT: beq a4, s0, .LBB17_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: sltu a0, s0, a5 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s0, a4 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: beqz a0, .LBB17_1 ; RV32-NEXT: j .LBB17_5 ; RV32-NEXT: .LBB17_4: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: sltu a0, s2, a4 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: mv a3, a5 +; RV32-NEXT: sltu a0, s1, a1 +; RV32-NEXT: mv a2, a1 +; RV32-NEXT: mv a3, a4 ; RV32-NEXT: beqz a0, .LBB17_1 ; RV32-NEXT: .LBB17_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB17_2 Depth=1 -; RV32-NEXT: mv a2, s2 +; RV32-NEXT: mv a2, s1 ; RV32-NEXT: mv a3, s0 ; RV32-NEXT: j .LBB17_1 ; RV32-NEXT: .LBB17_6: # %atomicrmw.end diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll index 81518541477a8..95cd49ff9611d 100644 --- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll +++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll @@ -5352,34 +5352,34 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s2, a0, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai s2, a1, 24 ; RV32I-NEXT: j .LBB45_2 ; RV32I-NEXT: .LBB45_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB45_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB45_4 ; RV32I-NEXT: .LBB45_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s2, a0, .LBB45_1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s2, a1, .LBB45_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB45_1 ; RV32I-NEXT: .LBB45_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -5423,34 +5423,34 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s2, a0, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai s2, a1, 56 ; RV64I-NEXT: j .LBB45_2 ; RV64I-NEXT: .LBB45_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB45_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB45_4 ; RV64I-NEXT: .LBB45_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB45_1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a1, .LBB45_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB45_1 ; RV64I-NEXT: .LBB45_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -5537,34 +5537,34 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s2, a0, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai s2, a1, 24 ; RV32I-NEXT: j .LBB46_2 ; RV32I-NEXT: .LBB46_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB46_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB46_4 ; RV32I-NEXT: .LBB46_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s2, a0, .LBB46_1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s2, a1, .LBB46_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB46_1 ; RV32I-NEXT: .LBB46_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -5637,34 +5637,34 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s2, a0, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai s2, a1, 56 ; RV64I-NEXT: j .LBB46_2 ; RV64I-NEXT: .LBB46_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB46_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB46_4 ; RV64I-NEXT: .LBB46_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB46_1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a1, .LBB46_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB46_1 ; RV64I-NEXT: .LBB46_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -5809,34 +5809,34 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s2, a0, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai s2, a1, 24 ; RV32I-NEXT: j .LBB47_2 ; RV32I-NEXT: .LBB47_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB47_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB47_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB47_4 ; RV32I-NEXT: .LBB47_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s2, a0, .LBB47_1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s2, a1, .LBB47_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB47_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB47_1 ; RV32I-NEXT: .LBB47_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -5909,34 +5909,34 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s2, a0, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai s2, a1, 56 ; RV64I-NEXT: j .LBB47_2 ; RV64I-NEXT: .LBB47_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB47_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB47_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB47_4 ; RV64I-NEXT: .LBB47_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB47_1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a1, .LBB47_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB47_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB47_1 ; RV64I-NEXT: .LBB47_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -6081,34 +6081,34 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s2, a0, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai s2, a1, 24 ; RV32I-NEXT: j .LBB48_2 ; RV32I-NEXT: .LBB48_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB48_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB48_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB48_4 ; RV32I-NEXT: .LBB48_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s2, a0, .LBB48_1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s2, a1, .LBB48_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB48_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB48_1 ; RV32I-NEXT: .LBB48_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -6181,34 +6181,34 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s2, a0, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai s2, a1, 56 ; RV64I-NEXT: j .LBB48_2 ; RV64I-NEXT: .LBB48_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB48_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB48_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB48_4 ; RV64I-NEXT: .LBB48_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB48_1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a1, .LBB48_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB48_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB48_1 ; RV64I-NEXT: .LBB48_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -6353,34 +6353,34 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s2, a0, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai s2, a1, 24 ; RV32I-NEXT: j .LBB49_2 ; RV32I-NEXT: .LBB49_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB49_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB49_4 ; RV32I-NEXT: .LBB49_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s2, a0, .LBB49_1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s2, a1, .LBB49_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB49_1 ; RV32I-NEXT: .LBB49_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -6424,34 +6424,34 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s2, a0, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai s2, a1, 56 ; RV64I-NEXT: j .LBB49_2 ; RV64I-NEXT: .LBB49_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB49_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB49_4 ; RV64I-NEXT: .LBB49_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB49_1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a1, .LBB49_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB49_1 ; RV64I-NEXT: .LBB49_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -6538,34 +6538,34 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s2, a0, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai s2, a1, 24 ; RV32I-NEXT: j .LBB50_2 ; RV32I-NEXT: .LBB50_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB50_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB50_4 ; RV32I-NEXT: .LBB50_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB50_1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s2, a1, .LBB50_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB50_1 ; RV32I-NEXT: .LBB50_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -6609,34 +6609,34 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s2, a0, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai s2, a1, 56 ; RV64I-NEXT: j .LBB50_2 ; RV64I-NEXT: .LBB50_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB50_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB50_4 ; RV64I-NEXT: .LBB50_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a0, .LBB50_1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a1, .LBB50_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB50_1 ; RV64I-NEXT: .LBB50_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -6723,34 +6723,34 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s2, a0, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai s2, a1, 24 ; RV32I-NEXT: j .LBB51_2 ; RV32I-NEXT: .LBB51_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB51_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB51_4 ; RV32I-NEXT: .LBB51_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB51_1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s2, a1, .LBB51_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB51_1 ; RV32I-NEXT: .LBB51_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -6823,34 +6823,34 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s2, a0, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai s2, a1, 56 ; RV64I-NEXT: j .LBB51_2 ; RV64I-NEXT: .LBB51_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB51_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB51_4 ; RV64I-NEXT: .LBB51_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a0, .LBB51_1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a1, .LBB51_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB51_1 ; RV64I-NEXT: .LBB51_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -6995,34 +6995,34 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s2, a0, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai s2, a1, 24 ; RV32I-NEXT: j .LBB52_2 ; RV32I-NEXT: .LBB52_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB52_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB52_4 ; RV32I-NEXT: .LBB52_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB52_1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s2, a1, .LBB52_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB52_1 ; RV32I-NEXT: .LBB52_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -7095,34 +7095,34 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s2, a0, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai s2, a1, 56 ; RV64I-NEXT: j .LBB52_2 ; RV64I-NEXT: .LBB52_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB52_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB52_4 ; RV64I-NEXT: .LBB52_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a0, .LBB52_1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a1, .LBB52_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB52_1 ; RV64I-NEXT: .LBB52_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -7267,34 +7267,34 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s2, a0, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai s2, a1, 24 ; RV32I-NEXT: j .LBB53_2 ; RV32I-NEXT: .LBB53_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB53_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB53_4 ; RV32I-NEXT: .LBB53_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB53_1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s2, a1, .LBB53_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB53_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB53_1 ; RV32I-NEXT: .LBB53_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -7367,34 +7367,34 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s2, a0, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai s2, a1, 56 ; RV64I-NEXT: j .LBB53_2 ; RV64I-NEXT: .LBB53_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB53_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB53_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB53_4 ; RV64I-NEXT: .LBB53_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a0, .LBB53_1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a1, .LBB53_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB53_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB53_1 ; RV64I-NEXT: .LBB53_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -7539,34 +7539,34 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 -; RV32I-NEXT: srai s2, a0, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai s2, a1, 24 ; RV32I-NEXT: j .LBB54_2 ; RV32I-NEXT: .LBB54_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB54_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB54_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB54_4 ; RV32I-NEXT: .LBB54_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 -; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB54_1 +; RV32I-NEXT: slli a1, a0, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s2, a1, .LBB54_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB54_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB54_1 ; RV32I-NEXT: .LBB54_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -7610,34 +7610,34 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 -; RV64I-NEXT: srai s2, a0, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai s2, a1, 56 ; RV64I-NEXT: j .LBB54_2 ; RV64I-NEXT: .LBB54_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB54_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB54_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB54_4 ; RV64I-NEXT: .LBB54_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 -; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a0, .LBB54_1 +; RV64I-NEXT: slli a1, a0, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a1, .LBB54_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB54_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB54_1 ; RV64I-NEXT: .LBB54_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -7724,32 +7724,32 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB55_2 ; RV32I-NEXT: .LBB55_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB55_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB55_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB55_4 ; RV32I-NEXT: .LBB55_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s2, a0, .LBB55_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s2, a1, .LBB55_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB55_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB55_1 ; RV32I-NEXT: .LBB55_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -7788,32 +7788,32 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB55_2 ; RV64I-NEXT: .LBB55_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB55_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB55_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB55_4 ; RV64I-NEXT: .LBB55_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a0, .LBB55_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a1, .LBB55_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB55_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB55_1 ; RV64I-NEXT: .LBB55_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -7890,32 +7890,32 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB56_2 ; RV32I-NEXT: .LBB56_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB56_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB56_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB56_4 ; RV32I-NEXT: .LBB56_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s2, a0, .LBB56_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s2, a1, .LBB56_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB56_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB56_1 ; RV32I-NEXT: .LBB56_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -7978,32 +7978,32 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB56_2 ; RV64I-NEXT: .LBB56_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB56_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB56_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB56_4 ; RV64I-NEXT: .LBB56_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a0, .LBB56_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a1, .LBB56_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB56_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB56_1 ; RV64I-NEXT: .LBB56_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -8128,32 +8128,32 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB57_2 ; RV32I-NEXT: .LBB57_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB57_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB57_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB57_4 ; RV32I-NEXT: .LBB57_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s2, a0, .LBB57_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s2, a1, .LBB57_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB57_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB57_1 ; RV32I-NEXT: .LBB57_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -8216,32 +8216,32 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB57_2 ; RV64I-NEXT: .LBB57_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB57_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB57_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB57_4 ; RV64I-NEXT: .LBB57_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a0, .LBB57_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a1, .LBB57_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB57_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB57_1 ; RV64I-NEXT: .LBB57_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -8366,32 +8366,32 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB58_2 ; RV32I-NEXT: .LBB58_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB58_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB58_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB58_4 ; RV32I-NEXT: .LBB58_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s2, a0, .LBB58_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s2, a1, .LBB58_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB58_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB58_1 ; RV32I-NEXT: .LBB58_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -8454,32 +8454,32 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB58_2 ; RV64I-NEXT: .LBB58_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB58_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB58_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB58_4 ; RV64I-NEXT: .LBB58_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a0, .LBB58_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a1, .LBB58_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB58_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB58_1 ; RV64I-NEXT: .LBB58_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -8604,32 +8604,32 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB59_2 ; RV32I-NEXT: .LBB59_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB59_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB59_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB59_4 ; RV32I-NEXT: .LBB59_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s2, a0, .LBB59_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s2, a1, .LBB59_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB59_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB59_1 ; RV32I-NEXT: .LBB59_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -8668,32 +8668,32 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB59_2 ; RV64I-NEXT: .LBB59_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB59_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB59_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB59_4 ; RV64I-NEXT: .LBB59_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a0, .LBB59_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a1, .LBB59_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB59_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB59_1 ; RV64I-NEXT: .LBB59_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -8770,32 +8770,32 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB60_2 ; RV32I-NEXT: .LBB60_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB60_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB60_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB60_4 ; RV32I-NEXT: .LBB60_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s2, a0, .LBB60_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s2, a1, .LBB60_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB60_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB60_1 ; RV32I-NEXT: .LBB60_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -8834,32 +8834,32 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB60_2 ; RV64I-NEXT: .LBB60_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB60_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB60_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB60_4 ; RV64I-NEXT: .LBB60_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a0, .LBB60_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a1, .LBB60_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB60_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB60_1 ; RV64I-NEXT: .LBB60_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -8936,32 +8936,32 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB61_2 ; RV32I-NEXT: .LBB61_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB61_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB61_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB61_4 ; RV32I-NEXT: .LBB61_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s2, a0, .LBB61_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s2, a1, .LBB61_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB61_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB61_1 ; RV32I-NEXT: .LBB61_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -9024,32 +9024,32 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB61_2 ; RV64I-NEXT: .LBB61_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB61_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB61_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB61_4 ; RV64I-NEXT: .LBB61_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a0, .LBB61_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a1, .LBB61_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB61_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB61_1 ; RV64I-NEXT: .LBB61_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -9174,32 +9174,32 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB62_2 ; RV32I-NEXT: .LBB62_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB62_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB62_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB62_4 ; RV32I-NEXT: .LBB62_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s2, a0, .LBB62_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s2, a1, .LBB62_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB62_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB62_1 ; RV32I-NEXT: .LBB62_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -9262,32 +9262,32 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB62_2 ; RV64I-NEXT: .LBB62_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB62_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB62_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB62_4 ; RV64I-NEXT: .LBB62_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a0, .LBB62_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a1, .LBB62_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB62_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB62_1 ; RV64I-NEXT: .LBB62_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -9412,32 +9412,32 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB63_2 ; RV32I-NEXT: .LBB63_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB63_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB63_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB63_4 ; RV32I-NEXT: .LBB63_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s2, a0, .LBB63_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s2, a1, .LBB63_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB63_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB63_1 ; RV32I-NEXT: .LBB63_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -9500,32 +9500,32 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB63_2 ; RV64I-NEXT: .LBB63_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB63_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB63_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB63_4 ; RV64I-NEXT: .LBB63_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a0, .LBB63_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a1, .LBB63_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB63_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB63_1 ; RV64I-NEXT: .LBB63_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -9650,32 +9650,32 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB64_2 ; RV32I-NEXT: .LBB64_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB64_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB64_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB64_4 ; RV32I-NEXT: .LBB64_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s2, a0, .LBB64_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s2, a1, .LBB64_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB64_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB64_1 ; RV32I-NEXT: .LBB64_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -9714,32 +9714,32 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB64_2 ; RV64I-NEXT: .LBB64_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB64_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB64_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB64_4 ; RV64I-NEXT: .LBB64_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a0, .LBB64_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a1, .LBB64_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB64_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB64_1 ; RV64I-NEXT: .LBB64_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -15381,34 +15381,34 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s2, a0, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai s2, a1, 16 ; RV32I-NEXT: j .LBB110_2 ; RV32I-NEXT: .LBB110_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB110_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: bnez a0, .LBB110_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: bnez a1, .LBB110_4 ; RV32I-NEXT: .LBB110_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 -; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s2, a0, .LBB110_1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s2, a1, .LBB110_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB110_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB110_1 ; RV32I-NEXT: .LBB110_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -15454,34 +15454,34 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s2, a0, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai s2, a1, 48 ; RV64I-NEXT: j .LBB110_2 ; RV64I-NEXT: .LBB110_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB110_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB110_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: bnez a1, .LBB110_4 ; RV64I-NEXT: .LBB110_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB110_1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a1, .LBB110_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB110_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB110_1 ; RV64I-NEXT: .LBB110_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -15572,34 +15572,34 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s2, a0, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai s2, a1, 16 ; RV32I-NEXT: j .LBB111_2 ; RV32I-NEXT: .LBB111_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB111_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: bnez a0, .LBB111_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: bnez a1, .LBB111_4 ; RV32I-NEXT: .LBB111_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 -; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s2, a0, .LBB111_1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s2, a1, .LBB111_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB111_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB111_1 ; RV32I-NEXT: .LBB111_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -15676,34 +15676,34 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s2, a0, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai s2, a1, 48 ; RV64I-NEXT: j .LBB111_2 ; RV64I-NEXT: .LBB111_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB111_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB111_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: bnez a1, .LBB111_4 ; RV64I-NEXT: .LBB111_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB111_1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a1, .LBB111_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB111_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB111_1 ; RV64I-NEXT: .LBB111_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -15856,34 +15856,34 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s2, a0, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai s2, a1, 16 ; RV32I-NEXT: j .LBB112_2 ; RV32I-NEXT: .LBB112_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB112_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: bnez a0, .LBB112_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: bnez a1, .LBB112_4 ; RV32I-NEXT: .LBB112_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 -; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s2, a0, .LBB112_1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s2, a1, .LBB112_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB112_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB112_1 ; RV32I-NEXT: .LBB112_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -15960,34 +15960,34 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s2, a0, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai s2, a1, 48 ; RV64I-NEXT: j .LBB112_2 ; RV64I-NEXT: .LBB112_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB112_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB112_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: bnez a1, .LBB112_4 ; RV64I-NEXT: .LBB112_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB112_1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a1, .LBB112_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB112_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB112_1 ; RV64I-NEXT: .LBB112_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -16140,34 +16140,34 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s2, a0, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai s2, a1, 16 ; RV32I-NEXT: j .LBB113_2 ; RV32I-NEXT: .LBB113_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB113_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: bnez a0, .LBB113_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: bnez a1, .LBB113_4 ; RV32I-NEXT: .LBB113_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 -; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s2, a0, .LBB113_1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s2, a1, .LBB113_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB113_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB113_1 ; RV32I-NEXT: .LBB113_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -16244,34 +16244,34 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s2, a0, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai s2, a1, 48 ; RV64I-NEXT: j .LBB113_2 ; RV64I-NEXT: .LBB113_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB113_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB113_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: bnez a1, .LBB113_4 ; RV64I-NEXT: .LBB113_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB113_1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a1, .LBB113_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB113_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB113_1 ; RV64I-NEXT: .LBB113_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -16424,34 +16424,34 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s2, a0, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai s2, a1, 16 ; RV32I-NEXT: j .LBB114_2 ; RV32I-NEXT: .LBB114_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB114_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: bnez a0, .LBB114_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: bnez a1, .LBB114_4 ; RV32I-NEXT: .LBB114_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 -; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s2, a0, .LBB114_1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s2, a1, .LBB114_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB114_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB114_1 ; RV32I-NEXT: .LBB114_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -16497,34 +16497,34 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s2, a0, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai s2, a1, 48 ; RV64I-NEXT: j .LBB114_2 ; RV64I-NEXT: .LBB114_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB114_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB114_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: bnez a1, .LBB114_4 ; RV64I-NEXT: .LBB114_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a0, .LBB114_1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a1, .LBB114_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB114_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB114_1 ; RV64I-NEXT: .LBB114_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -16615,34 +16615,34 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s2, a0, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai s2, a1, 16 ; RV32I-NEXT: j .LBB115_2 ; RV32I-NEXT: .LBB115_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB115_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: bnez a0, .LBB115_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: bnez a1, .LBB115_4 ; RV32I-NEXT: .LBB115_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 -; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB115_1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s2, a1, .LBB115_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB115_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB115_1 ; RV32I-NEXT: .LBB115_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -16688,34 +16688,34 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s2, a0, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai s2, a1, 48 ; RV64I-NEXT: j .LBB115_2 ; RV64I-NEXT: .LBB115_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB115_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB115_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: bnez a1, .LBB115_4 ; RV64I-NEXT: .LBB115_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a0, .LBB115_1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a1, .LBB115_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB115_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB115_1 ; RV64I-NEXT: .LBB115_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -16806,34 +16806,34 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s2, a0, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai s2, a1, 16 ; RV32I-NEXT: j .LBB116_2 ; RV32I-NEXT: .LBB116_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB116_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: bnez a0, .LBB116_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: bnez a1, .LBB116_4 ; RV32I-NEXT: .LBB116_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 -; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB116_1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s2, a1, .LBB116_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB116_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB116_1 ; RV32I-NEXT: .LBB116_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -16910,34 +16910,34 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s2, a0, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai s2, a1, 48 ; RV64I-NEXT: j .LBB116_2 ; RV64I-NEXT: .LBB116_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB116_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB116_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: bnez a1, .LBB116_4 ; RV64I-NEXT: .LBB116_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a0, .LBB116_1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a1, .LBB116_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB116_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB116_1 ; RV64I-NEXT: .LBB116_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -17090,34 +17090,34 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s2, a0, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai s2, a1, 16 ; RV32I-NEXT: j .LBB117_2 ; RV32I-NEXT: .LBB117_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB117_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: bnez a0, .LBB117_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: bnez a1, .LBB117_4 ; RV32I-NEXT: .LBB117_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 -; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB117_1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s2, a1, .LBB117_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB117_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB117_1 ; RV32I-NEXT: .LBB117_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -17194,34 +17194,34 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s2, a0, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai s2, a1, 48 ; RV64I-NEXT: j .LBB117_2 ; RV64I-NEXT: .LBB117_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB117_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB117_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: bnez a1, .LBB117_4 ; RV64I-NEXT: .LBB117_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a0, .LBB117_1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a1, .LBB117_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB117_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB117_1 ; RV64I-NEXT: .LBB117_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -17374,34 +17374,34 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s2, a0, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai s2, a1, 16 ; RV32I-NEXT: j .LBB118_2 ; RV32I-NEXT: .LBB118_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB118_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: bnez a0, .LBB118_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: bnez a1, .LBB118_4 ; RV32I-NEXT: .LBB118_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 -; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB118_1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s2, a1, .LBB118_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB118_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB118_1 ; RV32I-NEXT: .LBB118_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -17478,34 +17478,34 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s2, a0, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai s2, a1, 48 ; RV64I-NEXT: j .LBB118_2 ; RV64I-NEXT: .LBB118_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB118_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB118_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: bnez a1, .LBB118_4 ; RV64I-NEXT: .LBB118_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a0, .LBB118_1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a1, .LBB118_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB118_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB118_1 ; RV64I-NEXT: .LBB118_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -17658,34 +17658,34 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 -; RV32I-NEXT: srai s2, a0, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a0, 0(a0) +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai s2, a1, 16 ; RV32I-NEXT: j .LBB119_2 ; RV32I-NEXT: .LBB119_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB119_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: bnez a0, .LBB119_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: bnez a1, .LBB119_4 ; RV32I-NEXT: .LBB119_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 -; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s2, a0, .LBB119_1 +; RV32I-NEXT: slli a1, a0, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s2, a1, .LBB119_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB119_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB119_1 ; RV32I-NEXT: .LBB119_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -17731,34 +17731,34 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 -; RV64I-NEXT: srai s2, a0, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a0, 0(a0) +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai s2, a1, 48 ; RV64I-NEXT: j .LBB119_2 ; RV64I-NEXT: .LBB119_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB119_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: bnez a0, .LBB119_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: bnez a1, .LBB119_4 ; RV64I-NEXT: .LBB119_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 -; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a0, .LBB119_1 +; RV64I-NEXT: slli a1, a0, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a1, .LBB119_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB119_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB119_1 ; RV64I-NEXT: .LBB119_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -17852,32 +17852,32 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB120_2 ; RV32I-NEXT: .LBB120_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB120_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB120_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB120_4 ; RV32I-NEXT: .LBB120_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s3, a0, .LBB120_1 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s3, a1, .LBB120_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB120_2 Depth=1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB120_1 ; RV32I-NEXT: .LBB120_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -17921,32 +17921,32 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB120_2 ; RV64I-NEXT: .LBB120_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB120_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB120_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB120_4 ; RV64I-NEXT: .LBB120_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s3, a0, .LBB120_1 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s3, a1, .LBB120_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB120_2 Depth=1 ; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB120_1 ; RV64I-NEXT: .LBB120_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -18029,32 +18029,32 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB121_2 ; RV32I-NEXT: .LBB121_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB121_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB121_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB121_4 ; RV32I-NEXT: .LBB121_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s3, a0, .LBB121_1 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s3, a1, .LBB121_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB121_2 Depth=1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB121_1 ; RV32I-NEXT: .LBB121_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -18123,32 +18123,32 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB121_2 ; RV64I-NEXT: .LBB121_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB121_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB121_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB121_4 ; RV64I-NEXT: .LBB121_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s3, a0, .LBB121_1 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s3, a1, .LBB121_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB121_2 Depth=1 ; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB121_1 ; RV64I-NEXT: .LBB121_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -18281,32 +18281,32 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB122_2 ; RV32I-NEXT: .LBB122_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB122_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB122_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB122_4 ; RV32I-NEXT: .LBB122_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s3, a0, .LBB122_1 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s3, a1, .LBB122_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB122_2 Depth=1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB122_1 ; RV32I-NEXT: .LBB122_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -18375,32 +18375,32 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB122_2 ; RV64I-NEXT: .LBB122_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB122_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 3 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB122_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB122_4 ; RV64I-NEXT: .LBB122_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s3, a0, .LBB122_1 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s3, a1, .LBB122_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB122_2 Depth=1 ; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB122_1 ; RV64I-NEXT: .LBB122_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -18533,32 +18533,32 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB123_2 ; RV32I-NEXT: .LBB123_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB123_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB123_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB123_4 ; RV32I-NEXT: .LBB123_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s3, a0, .LBB123_1 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s3, a1, .LBB123_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB123_2 Depth=1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB123_1 ; RV32I-NEXT: .LBB123_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -18627,32 +18627,32 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB123_2 ; RV64I-NEXT: .LBB123_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB123_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB123_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB123_4 ; RV64I-NEXT: .LBB123_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s3, a0, .LBB123_1 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s3, a1, .LBB123_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB123_2 Depth=1 ; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB123_1 ; RV64I-NEXT: .LBB123_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -18785,32 +18785,32 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB124_2 ; RV32I-NEXT: .LBB124_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB124_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB124_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB124_4 ; RV32I-NEXT: .LBB124_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu s3, a0, .LBB124_1 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s3, a1, .LBB124_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB124_2 Depth=1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB124_1 ; RV32I-NEXT: .LBB124_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -18854,32 +18854,32 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB124_2 ; RV64I-NEXT: .LBB124_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB124_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB124_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB124_4 ; RV64I-NEXT: .LBB124_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu s3, a0, .LBB124_1 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s3, a1, .LBB124_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB124_2 Depth=1 ; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB124_1 ; RV64I-NEXT: .LBB124_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -18962,32 +18962,32 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB125_2 ; RV32I-NEXT: .LBB125_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB125_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB125_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB125_4 ; RV32I-NEXT: .LBB125_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s3, a0, .LBB125_1 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s3, a1, .LBB125_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB125_2 Depth=1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB125_1 ; RV32I-NEXT: .LBB125_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -19031,32 +19031,32 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB125_2 ; RV64I-NEXT: .LBB125_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB125_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB125_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB125_4 ; RV64I-NEXT: .LBB125_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s3, a0, .LBB125_1 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s3, a1, .LBB125_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB125_2 Depth=1 ; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB125_1 ; RV64I-NEXT: .LBB125_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -19139,32 +19139,32 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB126_2 ; RV32I-NEXT: .LBB126_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB126_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB126_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB126_4 ; RV32I-NEXT: .LBB126_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s3, a0, .LBB126_1 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s3, a1, .LBB126_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB126_2 Depth=1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB126_1 ; RV32I-NEXT: .LBB126_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -19233,32 +19233,32 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB126_2 ; RV64I-NEXT: .LBB126_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB126_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB126_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB126_4 ; RV64I-NEXT: .LBB126_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s3, a0, .LBB126_1 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s3, a1, .LBB126_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB126_2 Depth=1 ; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB126_1 ; RV64I-NEXT: .LBB126_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -19391,32 +19391,32 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB127_2 ; RV32I-NEXT: .LBB127_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB127_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 3 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB127_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB127_4 ; RV32I-NEXT: .LBB127_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s3, a0, .LBB127_1 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s3, a1, .LBB127_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB127_2 Depth=1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB127_1 ; RV32I-NEXT: .LBB127_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -19485,32 +19485,32 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB127_2 ; RV64I-NEXT: .LBB127_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB127_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 3 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB127_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB127_4 ; RV64I-NEXT: .LBB127_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s3, a0, .LBB127_1 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s3, a1, .LBB127_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB127_2 Depth=1 ; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB127_1 ; RV64I-NEXT: .LBB127_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -19643,32 +19643,32 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB128_2 ; RV32I-NEXT: .LBB128_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB128_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB128_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB128_4 ; RV32I-NEXT: .LBB128_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s3, a0, .LBB128_1 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s3, a1, .LBB128_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB128_2 Depth=1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB128_1 ; RV32I-NEXT: .LBB128_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -19737,32 +19737,32 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB128_2 ; RV64I-NEXT: .LBB128_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB128_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB128_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB128_4 ; RV64I-NEXT: .LBB128_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s3, a0, .LBB128_1 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s3, a1, .LBB128_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB128_2 Depth=1 ; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB128_1 ; RV64I-NEXT: .LBB128_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -19895,32 +19895,32 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB129_2 ; RV32I-NEXT: .LBB129_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB129_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB129_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB129_4 ; RV32I-NEXT: .LBB129_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgeu s3, a0, .LBB129_1 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s3, a1, .LBB129_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB129_2 Depth=1 ; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB129_1 ; RV32I-NEXT: .LBB129_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -19964,32 +19964,32 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB129_2 ; RV64I-NEXT: .LBB129_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB129_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB129_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB129_4 ; RV64I-NEXT: .LBB129_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bgeu s3, a0, .LBB129_1 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s3, a1, .LBB129_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB129_2 Depth=1 ; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB129_1 ; RV64I-NEXT: .LBB129_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -22176,30 +22176,30 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB165_2 ; RV32I-NEXT: .LBB165_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB165_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB165_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB165_4 ; RV32I-NEXT: .LBB165_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a3, .LBB165_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a0, .LBB165_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB165_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB165_1 ; RV32I-NEXT: .LBB165_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -22218,31 +22218,31 @@ define i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB165_2 ; RV64I-NEXT: .LBB165_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB165_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB165_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB165_4 ; RV64I-NEXT: .LBB165_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a3, .LBB165_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a0, .LBB165_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB165_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB165_1 ; RV64I-NEXT: .LBB165_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -22265,30 +22265,30 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB166_2 ; RV32I-NEXT: .LBB166_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB166_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB166_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB166_4 ; RV32I-NEXT: .LBB166_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a3, .LBB166_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a0, .LBB166_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB166_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB166_1 ; RV32I-NEXT: .LBB166_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -22312,31 +22312,31 @@ define i32 @atomicrmw_max_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB166_2 ; RV64I-NEXT: .LBB166_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB166_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB166_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB166_4 ; RV64I-NEXT: .LBB166_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a3, .LBB166_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a0, .LBB166_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB166_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB166_1 ; RV64I-NEXT: .LBB166_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -22364,30 +22364,30 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB167_2 ; RV32I-NEXT: .LBB167_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB167_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB167_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB167_4 ; RV32I-NEXT: .LBB167_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a3, .LBB167_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a0, .LBB167_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB167_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB167_1 ; RV32I-NEXT: .LBB167_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -22411,31 +22411,31 @@ define i32 @atomicrmw_max_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB167_2 ; RV64I-NEXT: .LBB167_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB167_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB167_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB167_4 ; RV64I-NEXT: .LBB167_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a3, .LBB167_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a0, .LBB167_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB167_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB167_1 ; RV64I-NEXT: .LBB167_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -22463,30 +22463,30 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB168_2 ; RV32I-NEXT: .LBB168_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB168_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB168_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB168_4 ; RV32I-NEXT: .LBB168_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a3, .LBB168_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a0, .LBB168_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB168_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB168_1 ; RV32I-NEXT: .LBB168_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -22510,31 +22510,31 @@ define i32 @atomicrmw_max_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB168_2 ; RV64I-NEXT: .LBB168_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB168_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB168_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB168_4 ; RV64I-NEXT: .LBB168_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a3, .LBB168_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a0, .LBB168_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB168_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB168_1 ; RV64I-NEXT: .LBB168_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -22562,30 +22562,30 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB169_2 ; RV32I-NEXT: .LBB169_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB169_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB169_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB169_4 ; RV32I-NEXT: .LBB169_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a3, .LBB169_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a0, .LBB169_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB169_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB169_1 ; RV32I-NEXT: .LBB169_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -22609,31 +22609,31 @@ define i32 @atomicrmw_max_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB169_2 ; RV64I-NEXT: .LBB169_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB169_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB169_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB169_4 ; RV64I-NEXT: .LBB169_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a3, .LBB169_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a0, .LBB169_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB169_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB169_1 ; RV64I-NEXT: .LBB169_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -22661,30 +22661,30 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB170_2 ; RV32I-NEXT: .LBB170_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB170_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB170_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB170_4 ; RV32I-NEXT: .LBB170_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a3, .LBB170_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a0, .LBB170_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB170_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB170_1 ; RV32I-NEXT: .LBB170_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -22703,31 +22703,31 @@ define i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB170_2 ; RV64I-NEXT: .LBB170_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB170_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB170_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB170_4 ; RV64I-NEXT: .LBB170_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a3, .LBB170_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a0, .LBB170_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB170_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB170_1 ; RV64I-NEXT: .LBB170_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -22750,30 +22750,30 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB171_2 ; RV32I-NEXT: .LBB171_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB171_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB171_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB171_4 ; RV32I-NEXT: .LBB171_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a3, .LBB171_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a0, .LBB171_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB171_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB171_1 ; RV32I-NEXT: .LBB171_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -22797,31 +22797,31 @@ define i32 @atomicrmw_min_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB171_2 ; RV64I-NEXT: .LBB171_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB171_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB171_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB171_4 ; RV64I-NEXT: .LBB171_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a3, .LBB171_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a0, .LBB171_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB171_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB171_1 ; RV64I-NEXT: .LBB171_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -22849,30 +22849,30 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB172_2 ; RV32I-NEXT: .LBB172_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB172_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB172_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB172_4 ; RV32I-NEXT: .LBB172_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a3, .LBB172_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a0, .LBB172_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB172_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB172_1 ; RV32I-NEXT: .LBB172_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -22896,31 +22896,31 @@ define i32 @atomicrmw_min_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB172_2 ; RV64I-NEXT: .LBB172_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB172_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB172_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB172_4 ; RV64I-NEXT: .LBB172_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a3, .LBB172_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a0, .LBB172_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB172_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB172_1 ; RV64I-NEXT: .LBB172_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -22948,30 +22948,30 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB173_2 ; RV32I-NEXT: .LBB173_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB173_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB173_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB173_4 ; RV32I-NEXT: .LBB173_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a3, .LBB173_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a0, .LBB173_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB173_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB173_1 ; RV32I-NEXT: .LBB173_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -22995,31 +22995,31 @@ define i32 @atomicrmw_min_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB173_2 ; RV64I-NEXT: .LBB173_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB173_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB173_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB173_4 ; RV64I-NEXT: .LBB173_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a3, .LBB173_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a0, .LBB173_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB173_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB173_1 ; RV64I-NEXT: .LBB173_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -23047,30 +23047,30 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB174_2 ; RV32I-NEXT: .LBB174_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB174_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB174_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB174_4 ; RV32I-NEXT: .LBB174_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a3, .LBB174_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a0, .LBB174_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB174_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB174_1 ; RV32I-NEXT: .LBB174_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -23094,31 +23094,31 @@ define i32 @atomicrmw_min_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB174_2 ; RV64I-NEXT: .LBB174_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB174_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB174_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB174_4 ; RV64I-NEXT: .LBB174_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a3, .LBB174_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a0, .LBB174_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB174_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB174_1 ; RV64I-NEXT: .LBB174_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -23146,30 +23146,30 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB175_2 ; RV32I-NEXT: .LBB175_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB175_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB175_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB175_4 ; RV32I-NEXT: .LBB175_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a3, .LBB175_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a0, .LBB175_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB175_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB175_1 ; RV32I-NEXT: .LBB175_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -23188,31 +23188,31 @@ define i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB175_2 ; RV64I-NEXT: .LBB175_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB175_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB175_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB175_4 ; RV64I-NEXT: .LBB175_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a3, .LBB175_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a0, .LBB175_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB175_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB175_1 ; RV64I-NEXT: .LBB175_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -23235,30 +23235,30 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB176_2 ; RV32I-NEXT: .LBB176_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB176_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB176_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB176_4 ; RV32I-NEXT: .LBB176_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a3, .LBB176_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a0, .LBB176_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB176_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB176_1 ; RV32I-NEXT: .LBB176_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -23282,31 +23282,31 @@ define i32 @atomicrmw_umax_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB176_2 ; RV64I-NEXT: .LBB176_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB176_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB176_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB176_4 ; RV64I-NEXT: .LBB176_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a3, .LBB176_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a0, .LBB176_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB176_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB176_1 ; RV64I-NEXT: .LBB176_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -23334,30 +23334,30 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB177_2 ; RV32I-NEXT: .LBB177_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB177_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB177_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB177_4 ; RV32I-NEXT: .LBB177_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a3, .LBB177_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a0, .LBB177_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB177_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB177_1 ; RV32I-NEXT: .LBB177_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -23381,31 +23381,31 @@ define i32 @atomicrmw_umax_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB177_2 ; RV64I-NEXT: .LBB177_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB177_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB177_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB177_4 ; RV64I-NEXT: .LBB177_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a3, .LBB177_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a0, .LBB177_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB177_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB177_1 ; RV64I-NEXT: .LBB177_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -23433,30 +23433,30 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB178_2 ; RV32I-NEXT: .LBB178_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB178_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB178_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB178_4 ; RV32I-NEXT: .LBB178_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a3, .LBB178_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a0, .LBB178_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB178_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB178_1 ; RV32I-NEXT: .LBB178_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -23480,31 +23480,31 @@ define i32 @atomicrmw_umax_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB178_2 ; RV64I-NEXT: .LBB178_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB178_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB178_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB178_4 ; RV64I-NEXT: .LBB178_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a3, .LBB178_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a0, .LBB178_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB178_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB178_1 ; RV64I-NEXT: .LBB178_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -23532,30 +23532,30 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB179_2 ; RV32I-NEXT: .LBB179_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB179_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB179_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB179_4 ; RV32I-NEXT: .LBB179_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a3, .LBB179_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a0, .LBB179_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB179_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB179_1 ; RV32I-NEXT: .LBB179_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -23579,31 +23579,31 @@ define i32 @atomicrmw_umax_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB179_2 ; RV64I-NEXT: .LBB179_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB179_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB179_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB179_4 ; RV64I-NEXT: .LBB179_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a3, .LBB179_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a0, .LBB179_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB179_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB179_1 ; RV64I-NEXT: .LBB179_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -23631,30 +23631,30 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB180_2 ; RV32I-NEXT: .LBB180_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB180_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB180_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB180_4 ; RV32I-NEXT: .LBB180_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a3, .LBB180_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a0, .LBB180_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB180_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB180_1 ; RV32I-NEXT: .LBB180_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -23673,31 +23673,31 @@ define i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB180_2 ; RV64I-NEXT: .LBB180_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB180_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB180_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB180_4 ; RV64I-NEXT: .LBB180_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a3, .LBB180_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a0, .LBB180_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB180_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB180_1 ; RV64I-NEXT: .LBB180_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -23720,30 +23720,30 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB181_2 ; RV32I-NEXT: .LBB181_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB181_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 2 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB181_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB181_4 ; RV32I-NEXT: .LBB181_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a3, .LBB181_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a0, .LBB181_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB181_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB181_1 ; RV32I-NEXT: .LBB181_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -23767,31 +23767,31 @@ define i32 @atomicrmw_umin_i32_acquire(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB181_2 ; RV64I-NEXT: .LBB181_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB181_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB181_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB181_4 ; RV64I-NEXT: .LBB181_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a3, .LBB181_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a0, .LBB181_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB181_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB181_1 ; RV64I-NEXT: .LBB181_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -23819,30 +23819,30 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB182_2 ; RV32I-NEXT: .LBB182_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB182_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 3 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB182_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB182_4 ; RV32I-NEXT: .LBB182_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a3, .LBB182_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a0, .LBB182_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB182_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB182_1 ; RV32I-NEXT: .LBB182_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -23866,31 +23866,31 @@ define i32 @atomicrmw_umin_i32_release(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB182_2 ; RV64I-NEXT: .LBB182_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB182_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB182_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB182_4 ; RV64I-NEXT: .LBB182_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a3, .LBB182_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a0, .LBB182_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB182_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB182_1 ; RV64I-NEXT: .LBB182_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -23918,30 +23918,30 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB183_2 ; RV32I-NEXT: .LBB183_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB183_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 4 ; RV32I-NEXT: li a4, 2 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB183_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB183_4 ; RV32I-NEXT: .LBB183_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a3, .LBB183_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a0, .LBB183_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB183_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB183_1 ; RV32I-NEXT: .LBB183_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -23965,31 +23965,31 @@ define i32 @atomicrmw_umin_i32_acq_rel(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB183_2 ; RV64I-NEXT: .LBB183_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB183_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB183_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB183_4 ; RV64I-NEXT: .LBB183_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a3, .LBB183_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a0, .LBB183_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB183_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB183_1 ; RV64I-NEXT: .LBB183_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -24017,30 +24017,30 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB184_2 ; RV32I-NEXT: .LBB184_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB184_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB184_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB184_4 ; RV32I-NEXT: .LBB184_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a3, .LBB184_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a0, .LBB184_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB184_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB184_1 ; RV32I-NEXT: .LBB184_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -24064,31 +24064,31 @@ define i32 @atomicrmw_umin_i32_seq_cst(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB184_2 ; RV64I-NEXT: .LBB184_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB184_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB184_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB184_4 ; RV64I-NEXT: .LBB184_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a3, .LBB184_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a0, .LBB184_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB184_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB184_1 ; RV64I-NEXT: .LBB184_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -26073,45 +26073,44 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB220_2 ; RV32I-NEXT: .LBB220_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB220_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB220_7 ; RV32I-NEXT: .LBB220_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB220_4 +; RV32I-NEXT: beq a1, s0, .LBB220_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB220_5 ; RV32I-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB220_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB220_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB220_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB220_1 ; RV32I-NEXT: .LBB220_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26127,45 +26126,44 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB220_2 ; RV32IA-NEXT: .LBB220_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB220_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB220_7 ; RV32IA-NEXT: .LBB220_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB220_4 +; RV32IA-NEXT: beq a1, s0, .LBB220_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB220_5 ; RV32IA-NEXT: .LBB220_4: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB220_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB220_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB220_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB220_1 ; RV32IA-NEXT: .LBB220_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26179,30 +26177,30 @@ define i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB220_2 ; RV64I-NEXT: .LBB220_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB220_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB220_4 ; RV64I-NEXT: .LBB220_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB220_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a0, .LBB220_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB220_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB220_1 ; RV64I-NEXT: .LBB220_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -26226,45 +26224,44 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB221_2 ; RV32I-NEXT: .LBB221_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB221_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB221_7 ; RV32I-NEXT: .LBB221_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB221_4 +; RV32I-NEXT: beq a1, s0, .LBB221_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB221_5 ; RV32I-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB221_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB221_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB221_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB221_1 ; RV32I-NEXT: .LBB221_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26280,45 +26277,44 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB221_2 ; RV32IA-NEXT: .LBB221_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB221_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB221_7 ; RV32IA-NEXT: .LBB221_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB221_4 +; RV32IA-NEXT: beq a1, s0, .LBB221_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB221_5 ; RV32IA-NEXT: .LBB221_4: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB221_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB221_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB221_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB221_1 ; RV32IA-NEXT: .LBB221_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26332,30 +26328,30 @@ define i64 @atomicrmw_max_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB221_2 ; RV64I-NEXT: .LBB221_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB221_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB221_4 ; RV64I-NEXT: .LBB221_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB221_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a0, .LBB221_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB221_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB221_1 ; RV64I-NEXT: .LBB221_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -26384,45 +26380,44 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB222_2 ; RV32I-NEXT: .LBB222_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB222_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB222_7 ; RV32I-NEXT: .LBB222_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB222_4 +; RV32I-NEXT: beq a1, s0, .LBB222_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB222_5 ; RV32I-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB222_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB222_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB222_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB222_1 ; RV32I-NEXT: .LBB222_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26438,45 +26433,44 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB222_2 ; RV32IA-NEXT: .LBB222_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB222_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB222_7 ; RV32IA-NEXT: .LBB222_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB222_4 +; RV32IA-NEXT: beq a1, s0, .LBB222_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB222_5 ; RV32IA-NEXT: .LBB222_4: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB222_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB222_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB222_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB222_1 ; RV32IA-NEXT: .LBB222_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26490,30 +26484,30 @@ define i64 @atomicrmw_max_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB222_2 ; RV64I-NEXT: .LBB222_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB222_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB222_4 ; RV64I-NEXT: .LBB222_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB222_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a0, .LBB222_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB222_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB222_1 ; RV64I-NEXT: .LBB222_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -26542,45 +26536,44 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB223_2 ; RV32I-NEXT: .LBB223_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB223_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB223_7 ; RV32I-NEXT: .LBB223_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB223_4 +; RV32I-NEXT: beq a1, s0, .LBB223_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB223_5 ; RV32I-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB223_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB223_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB223_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB223_1 ; RV32I-NEXT: .LBB223_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26596,45 +26589,44 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB223_2 ; RV32IA-NEXT: .LBB223_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB223_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB223_7 ; RV32IA-NEXT: .LBB223_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB223_4 +; RV32IA-NEXT: beq a1, s0, .LBB223_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB223_5 ; RV32IA-NEXT: .LBB223_4: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB223_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB223_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB223_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB223_1 ; RV32IA-NEXT: .LBB223_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26648,30 +26640,30 @@ define i64 @atomicrmw_max_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB223_2 ; RV64I-NEXT: .LBB223_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB223_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB223_4 ; RV64I-NEXT: .LBB223_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB223_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a0, .LBB223_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB223_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB223_1 ; RV64I-NEXT: .LBB223_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -26700,45 +26692,44 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB224_2 ; RV32I-NEXT: .LBB224_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB224_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB224_7 ; RV32I-NEXT: .LBB224_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB224_4 +; RV32I-NEXT: beq a1, s0, .LBB224_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB224_5 ; RV32I-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB224_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB224_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB224_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB224_1 ; RV32I-NEXT: .LBB224_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26754,45 +26745,44 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB224_2 ; RV32IA-NEXT: .LBB224_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB224_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB224_7 ; RV32IA-NEXT: .LBB224_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB224_4 +; RV32IA-NEXT: beq a1, s0, .LBB224_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB224_5 ; RV32IA-NEXT: .LBB224_4: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB224_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB224_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB224_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB224_1 ; RV32IA-NEXT: .LBB224_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26806,30 +26796,30 @@ define i64 @atomicrmw_max_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB224_2 ; RV64I-NEXT: .LBB224_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB224_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB224_4 ; RV64I-NEXT: .LBB224_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB224_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a0, .LBB224_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB224_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB224_1 ; RV64I-NEXT: .LBB224_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -26858,45 +26848,44 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB225_2 ; RV32I-NEXT: .LBB225_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB225_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB225_7 ; RV32I-NEXT: .LBB225_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB225_4 +; RV32I-NEXT: beq a1, s0, .LBB225_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB225_5 ; RV32I-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB225_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB225_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB225_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB225_1 ; RV32I-NEXT: .LBB225_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26912,45 +26901,44 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB225_2 ; RV32IA-NEXT: .LBB225_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB225_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB225_7 ; RV32IA-NEXT: .LBB225_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB225_4 +; RV32IA-NEXT: beq a1, s0, .LBB225_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB225_5 ; RV32IA-NEXT: .LBB225_4: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB225_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB225_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB225_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB225_1 ; RV32IA-NEXT: .LBB225_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -26964,30 +26952,30 @@ define i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB225_2 ; RV64I-NEXT: .LBB225_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB225_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB225_4 ; RV64I-NEXT: .LBB225_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB225_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a0, .LBB225_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB225_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB225_1 ; RV64I-NEXT: .LBB225_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -27011,45 +26999,44 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB226_2 ; RV32I-NEXT: .LBB226_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB226_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB226_7 ; RV32I-NEXT: .LBB226_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB226_4 +; RV32I-NEXT: beq a1, s0, .LBB226_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB226_5 ; RV32I-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB226_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB226_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB226_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB226_1 ; RV32I-NEXT: .LBB226_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27065,45 +27052,44 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB226_2 ; RV32IA-NEXT: .LBB226_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB226_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB226_7 ; RV32IA-NEXT: .LBB226_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB226_4 +; RV32IA-NEXT: beq a1, s0, .LBB226_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB226_5 ; RV32IA-NEXT: .LBB226_4: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB226_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB226_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB226_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB226_1 ; RV32IA-NEXT: .LBB226_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27117,30 +27103,30 @@ define i64 @atomicrmw_min_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB226_2 ; RV64I-NEXT: .LBB226_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB226_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB226_4 ; RV64I-NEXT: .LBB226_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB226_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a0, .LBB226_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB226_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB226_1 ; RV64I-NEXT: .LBB226_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -27169,45 +27155,44 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB227_2 ; RV32I-NEXT: .LBB227_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB227_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB227_7 ; RV32I-NEXT: .LBB227_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB227_4 +; RV32I-NEXT: beq a1, s0, .LBB227_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB227_5 ; RV32I-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB227_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB227_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB227_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB227_1 ; RV32I-NEXT: .LBB227_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27223,45 +27208,44 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB227_2 ; RV32IA-NEXT: .LBB227_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB227_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB227_7 ; RV32IA-NEXT: .LBB227_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB227_4 +; RV32IA-NEXT: beq a1, s0, .LBB227_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB227_5 ; RV32IA-NEXT: .LBB227_4: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB227_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB227_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB227_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB227_1 ; RV32IA-NEXT: .LBB227_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27275,30 +27259,30 @@ define i64 @atomicrmw_min_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB227_2 ; RV64I-NEXT: .LBB227_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB227_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB227_4 ; RV64I-NEXT: .LBB227_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB227_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a0, .LBB227_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB227_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB227_1 ; RV64I-NEXT: .LBB227_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -27327,45 +27311,44 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB228_2 ; RV32I-NEXT: .LBB228_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB228_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB228_7 ; RV32I-NEXT: .LBB228_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB228_4 +; RV32I-NEXT: beq a1, s0, .LBB228_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB228_5 ; RV32I-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB228_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB228_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB228_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB228_1 ; RV32I-NEXT: .LBB228_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27381,45 +27364,44 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB228_2 ; RV32IA-NEXT: .LBB228_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB228_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB228_7 ; RV32IA-NEXT: .LBB228_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB228_4 +; RV32IA-NEXT: beq a1, s0, .LBB228_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB228_5 ; RV32IA-NEXT: .LBB228_4: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB228_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB228_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB228_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB228_1 ; RV32IA-NEXT: .LBB228_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27433,30 +27415,30 @@ define i64 @atomicrmw_min_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB228_2 ; RV64I-NEXT: .LBB228_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB228_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB228_4 ; RV64I-NEXT: .LBB228_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB228_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a0, .LBB228_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB228_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB228_1 ; RV64I-NEXT: .LBB228_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -27485,45 +27467,44 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB229_2 ; RV32I-NEXT: .LBB229_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB229_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB229_7 ; RV32I-NEXT: .LBB229_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB229_4 +; RV32I-NEXT: beq a1, s0, .LBB229_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB229_5 ; RV32I-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB229_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB229_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB229_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB229_1 ; RV32I-NEXT: .LBB229_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27539,45 +27520,44 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB229_2 ; RV32IA-NEXT: .LBB229_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB229_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB229_7 ; RV32IA-NEXT: .LBB229_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB229_4 +; RV32IA-NEXT: beq a1, s0, .LBB229_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB229_5 ; RV32IA-NEXT: .LBB229_4: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB229_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB229_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB229_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB229_1 ; RV32IA-NEXT: .LBB229_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27591,30 +27571,30 @@ define i64 @atomicrmw_min_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB229_2 ; RV64I-NEXT: .LBB229_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB229_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB229_4 ; RV64I-NEXT: .LBB229_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB229_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a0, .LBB229_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB229_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB229_1 ; RV64I-NEXT: .LBB229_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -27643,45 +27623,44 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB230_2 ; RV32I-NEXT: .LBB230_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB230_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB230_7 ; RV32I-NEXT: .LBB230_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB230_4 +; RV32I-NEXT: beq a1, s0, .LBB230_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB230_5 ; RV32I-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB230_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB230_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB230_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB230_1 ; RV32I-NEXT: .LBB230_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27697,45 +27676,44 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB230_2 ; RV32IA-NEXT: .LBB230_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB230_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB230_7 ; RV32IA-NEXT: .LBB230_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB230_4 +; RV32IA-NEXT: beq a1, s0, .LBB230_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB230_5 ; RV32IA-NEXT: .LBB230_4: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB230_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB230_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB230_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB230_1 ; RV32IA-NEXT: .LBB230_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27749,30 +27727,30 @@ define i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB230_2 ; RV64I-NEXT: .LBB230_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB230_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB230_4 ; RV64I-NEXT: .LBB230_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB230_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a0, .LBB230_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB230_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB230_1 ; RV64I-NEXT: .LBB230_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -27796,45 +27774,44 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB231_2 ; RV32I-NEXT: .LBB231_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB231_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB231_7 ; RV32I-NEXT: .LBB231_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB231_4 +; RV32I-NEXT: beq a1, s0, .LBB231_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB231_5 ; RV32I-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB231_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB231_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB231_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB231_1 ; RV32I-NEXT: .LBB231_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27850,45 +27827,44 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB231_2 ; RV32IA-NEXT: .LBB231_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB231_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB231_7 ; RV32IA-NEXT: .LBB231_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB231_4 +; RV32IA-NEXT: beq a1, s0, .LBB231_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB231_5 ; RV32IA-NEXT: .LBB231_4: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB231_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB231_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB231_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB231_1 ; RV32IA-NEXT: .LBB231_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -27902,30 +27878,30 @@ define i64 @atomicrmw_umax_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB231_2 ; RV64I-NEXT: .LBB231_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB231_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB231_4 ; RV64I-NEXT: .LBB231_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB231_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a0, .LBB231_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB231_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB231_1 ; RV64I-NEXT: .LBB231_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -27954,45 +27930,44 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB232_2 ; RV32I-NEXT: .LBB232_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB232_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB232_7 ; RV32I-NEXT: .LBB232_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB232_4 +; RV32I-NEXT: beq a1, s0, .LBB232_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB232_5 ; RV32I-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB232_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB232_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB232_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB232_1 ; RV32I-NEXT: .LBB232_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28008,45 +27983,44 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB232_2 ; RV32IA-NEXT: .LBB232_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB232_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB232_7 ; RV32IA-NEXT: .LBB232_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB232_4 +; RV32IA-NEXT: beq a1, s0, .LBB232_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB232_5 ; RV32IA-NEXT: .LBB232_4: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB232_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB232_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB232_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB232_1 ; RV32IA-NEXT: .LBB232_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28060,30 +28034,30 @@ define i64 @atomicrmw_umax_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB232_2 ; RV64I-NEXT: .LBB232_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB232_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB232_4 ; RV64I-NEXT: .LBB232_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB232_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a0, .LBB232_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB232_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB232_1 ; RV64I-NEXT: .LBB232_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -28112,45 +28086,44 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB233_2 ; RV32I-NEXT: .LBB233_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB233_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB233_7 ; RV32I-NEXT: .LBB233_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB233_4 +; RV32I-NEXT: beq a1, s0, .LBB233_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB233_5 ; RV32I-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB233_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB233_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB233_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB233_1 ; RV32I-NEXT: .LBB233_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28166,45 +28139,44 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB233_2 ; RV32IA-NEXT: .LBB233_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB233_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB233_7 ; RV32IA-NEXT: .LBB233_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB233_4 +; RV32IA-NEXT: beq a1, s0, .LBB233_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB233_5 ; RV32IA-NEXT: .LBB233_4: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB233_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB233_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB233_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB233_1 ; RV32IA-NEXT: .LBB233_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28218,30 +28190,30 @@ define i64 @atomicrmw_umax_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB233_2 ; RV64I-NEXT: .LBB233_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB233_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB233_4 ; RV64I-NEXT: .LBB233_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB233_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a0, .LBB233_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB233_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB233_1 ; RV64I-NEXT: .LBB233_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -28270,45 +28242,44 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB234_2 ; RV32I-NEXT: .LBB234_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB234_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB234_7 ; RV32I-NEXT: .LBB234_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB234_4 +; RV32I-NEXT: beq a1, s0, .LBB234_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB234_5 ; RV32I-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB234_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB234_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB234_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB234_1 ; RV32I-NEXT: .LBB234_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28324,45 +28295,44 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB234_2 ; RV32IA-NEXT: .LBB234_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB234_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB234_7 ; RV32IA-NEXT: .LBB234_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB234_4 +; RV32IA-NEXT: beq a1, s0, .LBB234_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB234_5 ; RV32IA-NEXT: .LBB234_4: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB234_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB234_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB234_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB234_1 ; RV32IA-NEXT: .LBB234_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28376,30 +28346,30 @@ define i64 @atomicrmw_umax_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB234_2 ; RV64I-NEXT: .LBB234_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB234_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB234_4 ; RV64I-NEXT: .LBB234_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB234_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a0, .LBB234_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB234_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB234_1 ; RV64I-NEXT: .LBB234_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -28428,45 +28398,44 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB235_2 ; RV32I-NEXT: .LBB235_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB235_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB235_7 ; RV32I-NEXT: .LBB235_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB235_4 +; RV32I-NEXT: beq a1, s0, .LBB235_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB235_5 ; RV32I-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB235_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB235_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB235_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB235_1 ; RV32I-NEXT: .LBB235_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28482,45 +28451,44 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB235_2 ; RV32IA-NEXT: .LBB235_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB235_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB235_7 ; RV32IA-NEXT: .LBB235_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB235_4 +; RV32IA-NEXT: beq a1, s0, .LBB235_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB235_5 ; RV32IA-NEXT: .LBB235_4: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB235_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB235_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB235_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB235_1 ; RV32IA-NEXT: .LBB235_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28534,30 +28502,30 @@ define i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB235_2 ; RV64I-NEXT: .LBB235_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB235_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB235_4 ; RV64I-NEXT: .LBB235_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB235_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a0, .LBB235_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB235_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB235_1 ; RV64I-NEXT: .LBB235_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -28581,45 +28549,44 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB236_2 ; RV32I-NEXT: .LBB236_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 2 ; RV32I-NEXT: li a5, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB236_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB236_7 ; RV32I-NEXT: .LBB236_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB236_4 +; RV32I-NEXT: beq a1, s0, .LBB236_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB236_5 ; RV32I-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB236_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB236_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB236_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB236_1 ; RV32I-NEXT: .LBB236_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28635,45 +28602,44 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB236_2 ; RV32IA-NEXT: .LBB236_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 2 ; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB236_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB236_7 ; RV32IA-NEXT: .LBB236_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB236_4 +; RV32IA-NEXT: beq a1, s0, .LBB236_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB236_5 ; RV32IA-NEXT: .LBB236_4: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB236_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB236_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB236_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB236_1 ; RV32IA-NEXT: .LBB236_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28687,30 +28653,30 @@ define i64 @atomicrmw_umin_i64_acquire(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB236_2 ; RV64I-NEXT: .LBB236_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 2 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB236_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB236_4 ; RV64I-NEXT: .LBB236_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB236_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a0, .LBB236_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB236_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB236_1 ; RV64I-NEXT: .LBB236_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -28739,45 +28705,44 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB237_2 ; RV32I-NEXT: .LBB237_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 3 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB237_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB237_7 ; RV32I-NEXT: .LBB237_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB237_4 +; RV32I-NEXT: beq a1, s0, .LBB237_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB237_5 ; RV32I-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB237_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB237_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB237_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB237_1 ; RV32I-NEXT: .LBB237_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28793,45 +28758,44 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB237_2 ; RV32IA-NEXT: .LBB237_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 3 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB237_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB237_7 ; RV32IA-NEXT: .LBB237_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB237_4 +; RV32IA-NEXT: beq a1, s0, .LBB237_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB237_5 ; RV32IA-NEXT: .LBB237_4: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB237_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB237_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB237_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB237_1 ; RV32IA-NEXT: .LBB237_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28845,30 +28809,30 @@ define i64 @atomicrmw_umin_i64_release(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB237_2 ; RV64I-NEXT: .LBB237_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 3 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB237_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB237_4 ; RV64I-NEXT: .LBB237_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB237_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a0, .LBB237_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB237_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB237_1 ; RV64I-NEXT: .LBB237_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -28897,45 +28861,44 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB238_2 ; RV32I-NEXT: .LBB238_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 4 ; RV32I-NEXT: li a5, 2 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB238_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB238_7 ; RV32I-NEXT: .LBB238_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB238_4 +; RV32I-NEXT: beq a1, s0, .LBB238_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB238_5 ; RV32I-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB238_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB238_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB238_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB238_1 ; RV32I-NEXT: .LBB238_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -28951,45 +28914,44 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB238_2 ; RV32IA-NEXT: .LBB238_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 4 ; RV32IA-NEXT: li a5, 2 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB238_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB238_7 ; RV32IA-NEXT: .LBB238_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB238_4 +; RV32IA-NEXT: beq a1, s0, .LBB238_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB238_5 ; RV32IA-NEXT: .LBB238_4: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB238_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB238_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB238_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB238_1 ; RV32IA-NEXT: .LBB238_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -29003,30 +28965,30 @@ define i64 @atomicrmw_umin_i64_acq_rel(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB238_2 ; RV64I-NEXT: .LBB238_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 2 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB238_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB238_4 ; RV64I-NEXT: .LBB238_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB238_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a0, .LBB238_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB238_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB238_1 ; RV64I-NEXT: .LBB238_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -29055,45 +29017,44 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB239_2 ; RV32I-NEXT: .LBB239_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB239_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB239_7 ; RV32I-NEXT: .LBB239_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB239_4 +; RV32I-NEXT: beq a1, s0, .LBB239_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB239_5 ; RV32I-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB239_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB239_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB239_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB239_1 ; RV32I-NEXT: .LBB239_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -29109,45 +29070,44 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB239_2 ; RV32IA-NEXT: .LBB239_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB239_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB239_7 ; RV32IA-NEXT: .LBB239_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB239_4 +; RV32IA-NEXT: beq a1, s0, .LBB239_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB239_5 ; RV32IA-NEXT: .LBB239_4: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB239_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB239_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB239_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB239_1 ; RV32IA-NEXT: .LBB239_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -29161,30 +29121,30 @@ define i64 @atomicrmw_umin_i64_seq_cst(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB239_2 ; RV64I-NEXT: .LBB239_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB239_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB239_4 ; RV64I-NEXT: .LBB239_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB239_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a0, .LBB239_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB239_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB239_1 ; RV64I-NEXT: .LBB239_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/atomic-signext.ll b/llvm/test/CodeGen/RISCV/atomic-signext.ll index aea7473ceece4..81c47f8701c50 100644 --- a/llvm/test/CodeGen/RISCV/atomic-signext.ll +++ b/llvm/test/CodeGen/RISCV/atomic-signext.ll @@ -586,34 +586,34 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s0, 24 ; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB10_2 ; RV32I-NEXT: .LBB10_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB10_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a1, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) +; RV32I-NEXT: lbu a1, 15(sp) ; RV32I-NEXT: bnez a0, .LBB10_4 ; RV32I-NEXT: .LBB10_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 +; RV32I-NEXT: slli a0, a1, 24 ; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: blt s2, a0, .LBB10_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB10_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB10_1 ; RV32I-NEXT: .LBB10_4: # %atomicrmw.end -; RV32I-NEXT: slli a0, a3, 24 +; RV32I-NEXT: slli a0, a1, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -660,34 +660,34 @@ define signext i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s0, 56 ; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB10_2 ; RV64I-NEXT: .LBB10_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB10_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a1, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) +; RV64I-NEXT: lbu a1, 15(sp) ; RV64I-NEXT: bnez a0, .LBB10_4 ; RV64I-NEXT: .LBB10_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 +; RV64I-NEXT: slli a0, a1, 56 ; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a2, a1 ; RV64I-NEXT: blt s2, a0, .LBB10_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB10_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB10_1 ; RV64I-NEXT: .LBB10_4: # %atomicrmw.end -; RV64I-NEXT: slli a0, a3, 56 +; RV64I-NEXT: slli a0, a1, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -738,34 +738,34 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 24 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: slli a0, s0, 24 ; RV32I-NEXT: srai s2, a0, 24 ; RV32I-NEXT: j .LBB11_2 ; RV32I-NEXT: .LBB11_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a1, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) +; RV32I-NEXT: lbu a1, 15(sp) ; RV32I-NEXT: bnez a0, .LBB11_4 ; RV32I-NEXT: .LBB11_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 24 +; RV32I-NEXT: slli a0, a1, 24 ; RV32I-NEXT: srai a0, a0, 24 -; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: bge s2, a0, .LBB11_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB11_1 ; RV32I-NEXT: .LBB11_4: # %atomicrmw.end -; RV32I-NEXT: slli a0, a3, 24 +; RV32I-NEXT: slli a0, a1, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -812,34 +812,34 @@ define signext i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 56 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: slli a0, s0, 56 ; RV64I-NEXT: srai s2, a0, 56 ; RV64I-NEXT: j .LBB11_2 ; RV64I-NEXT: .LBB11_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a1, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) +; RV64I-NEXT: lbu a1, 15(sp) ; RV64I-NEXT: bnez a0, .LBB11_4 ; RV64I-NEXT: .LBB11_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 56 +; RV64I-NEXT: slli a0, a1, 56 ; RV64I-NEXT: srai a0, a0, 56 -; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a2, a1 ; RV64I-NEXT: bge s2, a0, .LBB11_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB11_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB11_1 ; RV64I-NEXT: .LBB11_4: # %atomicrmw.end -; RV64I-NEXT: slli a0, a3, 56 +; RV64I-NEXT: slli a0, a1, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -890,32 +890,32 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: andi s2, a1, 255 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: andi s2, s0, 255 ; RV32I-NEXT: j .LBB12_2 ; RV32I-NEXT: .LBB12_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB12_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a1, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) +; RV32I-NEXT: lbu a1, 15(sp) ; RV32I-NEXT: bnez a0, .LBB12_4 ; RV32I-NEXT: .LBB12_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: andi a0, a1, 255 +; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: bltu s2, a0, .LBB12_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB12_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB12_1 ; RV32I-NEXT: .LBB12_4: # %atomicrmw.end -; RV32I-NEXT: slli a0, a3, 24 +; RV32I-NEXT: slli a0, a1, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -957,32 +957,32 @@ define signext i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: andi s2, a1, 255 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: andi s2, s0, 255 ; RV64I-NEXT: j .LBB12_2 ; RV64I-NEXT: .LBB12_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB12_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a1, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) +; RV64I-NEXT: lbu a1, 15(sp) ; RV64I-NEXT: bnez a0, .LBB12_4 ; RV64I-NEXT: .LBB12_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: andi a0, a1, 255 +; RV64I-NEXT: mv a2, a1 ; RV64I-NEXT: bltu s2, a0, .LBB12_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB12_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB12_1 ; RV64I-NEXT: .LBB12_4: # %atomicrmw.end -; RV64I-NEXT: slli a0, a3, 56 +; RV64I-NEXT: slli a0, a1, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1028,32 +1028,32 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: andi s2, a1, 255 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: andi s2, s0, 255 ; RV32I-NEXT: j .LBB13_2 ; RV32I-NEXT: .LBB13_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a1, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) +; RV32I-NEXT: lbu a1, 15(sp) ; RV32I-NEXT: bnez a0, .LBB13_4 ; RV32I-NEXT: .LBB13_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: andi a0, a1, 255 +; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: bgeu s2, a0, .LBB13_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB13_1 ; RV32I-NEXT: .LBB13_4: # %atomicrmw.end -; RV32I-NEXT: slli a0, a3, 24 +; RV32I-NEXT: slli a0, a1, 24 ; RV32I-NEXT: srai a0, a0, 24 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1095,32 +1095,32 @@ define signext i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: andi s2, a1, 255 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: andi s2, s0, 255 ; RV64I-NEXT: j .LBB13_2 ; RV64I-NEXT: .LBB13_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a1, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) +; RV64I-NEXT: lbu a1, 15(sp) ; RV64I-NEXT: bnez a0, .LBB13_4 ; RV64I-NEXT: .LBB13_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: andi a0, a1, 255 +; RV64I-NEXT: mv a2, a1 ; RV64I-NEXT: bgeu s2, a0, .LBB13_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB13_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB13_1 ; RV64I-NEXT: .LBB13_4: # %atomicrmw.end -; RV64I-NEXT: slli a0, a3, 56 +; RV64I-NEXT: slli a0, a1, 56 ; RV64I-NEXT: srai a0, a0, 56 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1634,34 +1634,34 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s0, 16 ; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB21_2 ; RV32I-NEXT: .LBB21_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB21_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a1, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) +; RV32I-NEXT: lh a1, 14(sp) ; RV32I-NEXT: bnez a0, .LBB21_4 ; RV32I-NEXT: .LBB21_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 +; RV32I-NEXT: slli a0, a1, 16 ; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: blt s2, a0, .LBB21_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB21_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB21_1 ; RV32I-NEXT: .LBB21_4: # %atomicrmw.end -; RV32I-NEXT: slli a0, a3, 16 +; RV32I-NEXT: slli a0, a1, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1710,34 +1710,34 @@ define signext i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s0, 48 ; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB21_2 ; RV64I-NEXT: .LBB21_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB21_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a1, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) +; RV64I-NEXT: lh a1, 14(sp) ; RV64I-NEXT: bnez a0, .LBB21_4 ; RV64I-NEXT: .LBB21_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 +; RV64I-NEXT: slli a0, a1, 48 ; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a2, a1 ; RV64I-NEXT: blt s2, a0, .LBB21_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB21_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB21_1 ; RV64I-NEXT: .LBB21_4: # %atomicrmw.end -; RV64I-NEXT: slli a0, a3, 48 +; RV64I-NEXT: slli a0, a1, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -1790,34 +1790,34 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 -; RV32I-NEXT: slli a0, a1, 16 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: slli a0, s0, 16 ; RV32I-NEXT: srai s2, a0, 16 ; RV32I-NEXT: j .LBB22_2 ; RV32I-NEXT: .LBB22_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB22_2 Depth=1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: sh a1, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) +; RV32I-NEXT: lh a1, 14(sp) ; RV32I-NEXT: bnez a0, .LBB22_4 ; RV32I-NEXT: .LBB22_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: slli a0, a3, 16 +; RV32I-NEXT: slli a0, a1, 16 ; RV32I-NEXT: srai a0, a0, 16 -; RV32I-NEXT: mv a2, a3 +; RV32I-NEXT: mv a2, a1 ; RV32I-NEXT: bge s2, a0, .LBB22_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB22_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB22_1 ; RV32I-NEXT: .LBB22_4: # %atomicrmw.end -; RV32I-NEXT: slli a0, a3, 16 +; RV32I-NEXT: slli a0, a1, 16 ; RV32I-NEXT: srai a0, a0, 16 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload @@ -1866,34 +1866,34 @@ define signext i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 -; RV64I-NEXT: slli a0, a1, 48 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: slli a0, s0, 48 ; RV64I-NEXT: srai s2, a0, 48 ; RV64I-NEXT: j .LBB22_2 ; RV64I-NEXT: .LBB22_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB22_2 Depth=1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: sh a1, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) +; RV64I-NEXT: lh a1, 14(sp) ; RV64I-NEXT: bnez a0, .LBB22_4 ; RV64I-NEXT: .LBB22_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: slli a0, a3, 48 +; RV64I-NEXT: slli a0, a1, 48 ; RV64I-NEXT: srai a0, a0, 48 -; RV64I-NEXT: mv a2, a3 +; RV64I-NEXT: mv a2, a1 ; RV64I-NEXT: bge s2, a0, .LBB22_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB22_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB22_1 ; RV64I-NEXT: .LBB22_4: # %atomicrmw.end -; RV64I-NEXT: slli a0, a3, 48 +; RV64I-NEXT: slli a0, a1, 48 ; RV64I-NEXT: srai a0, a0, 48 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -2530,30 +2530,30 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB32_2 ; RV32I-NEXT: .LBB32_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB32_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB32_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB32_4 ; RV32I-NEXT: .LBB32_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: blt s1, a3, .LBB32_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt s0, a0, .LBB32_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB32_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB32_1 ; RV32I-NEXT: .LBB32_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -2572,31 +2572,31 @@ define signext i32 @atomicrmw_max_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB32_2 ; RV64I-NEXT: .LBB32_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB32_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB32_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB32_4 ; RV64I-NEXT: .LBB32_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s2, a3, .LBB32_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s2, a0, .LBB32_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB32_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB32_1 ; RV64I-NEXT: .LBB32_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -2619,30 +2619,30 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB33_2 ; RV32I-NEXT: .LBB33_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB33_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB33_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB33_4 ; RV32I-NEXT: .LBB33_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bge s1, a3, .LBB33_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bge s0, a0, .LBB33_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB33_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB33_1 ; RV32I-NEXT: .LBB33_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -2661,31 +2661,31 @@ define signext i32 @atomicrmw_min_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB33_2 ; RV64I-NEXT: .LBB33_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB33_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB33_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB33_4 ; RV64I-NEXT: .LBB33_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s2, a3, .LBB33_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s2, a0, .LBB33_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB33_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB33_1 ; RV64I-NEXT: .LBB33_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -2708,30 +2708,30 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB34_2 ; RV32I-NEXT: .LBB34_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB34_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB34_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB34_4 ; RV32I-NEXT: .LBB34_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bltu s1, a3, .LBB34_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu s0, a0, .LBB34_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB34_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB34_1 ; RV32I-NEXT: .LBB34_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -2750,31 +2750,31 @@ define signext i32 @atomicrmw_umax_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB34_2 ; RV64I-NEXT: .LBB34_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB34_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB34_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB34_4 ; RV64I-NEXT: .LBB34_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s2, a3, .LBB34_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s2, a0, .LBB34_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB34_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB34_1 ; RV64I-NEXT: .LBB34_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -2797,30 +2797,30 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB35_2 ; RV32I-NEXT: .LBB35_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB35_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB35_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB35_4 ; RV32I-NEXT: .LBB35_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a3 -; RV32I-NEXT: bgeu s1, a3, .LBB35_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgeu s0, a0, .LBB35_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB35_2 Depth=1 -; RV32I-NEXT: mv a2, s1 +; RV32I-NEXT: mv a2, s0 ; RV32I-NEXT: j .LBB35_1 ; RV32I-NEXT: .LBB35_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -2839,31 +2839,31 @@ define signext i32 @atomicrmw_umin_i32_monotonic(ptr %a, i32 %b) nounwind { ; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB35_2 ; RV64I-NEXT: .LBB35_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB35_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB35_4 ; RV64I-NEXT: .LBB35_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s2, a3, .LBB35_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s2, a0, .LBB35_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB35_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB35_1 ; RV64I-NEXT: .LBB35_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -3183,45 +3183,44 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB43_2 ; RV32I-NEXT: .LBB43_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB43_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB43_7 ; RV32I-NEXT: .LBB43_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB43_4 +; RV32I-NEXT: beq a1, s0, .LBB43_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB43_5 ; RV32I-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB43_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB43_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB43_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB43_1 ; RV32I-NEXT: .LBB43_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3237,45 +3236,44 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB43_2 ; RV32IA-NEXT: .LBB43_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB43_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB43_7 ; RV32IA-NEXT: .LBB43_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB43_4 +; RV32IA-NEXT: beq a1, s0, .LBB43_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB43_5 ; RV32IA-NEXT: .LBB43_4: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB43_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB43_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB43_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB43_1 ; RV32IA-NEXT: .LBB43_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3289,30 +3287,30 @@ define signext i64 @atomicrmw_max_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB43_2 ; RV64I-NEXT: .LBB43_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB43_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB43_4 ; RV64I-NEXT: .LBB43_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: blt s1, a3, .LBB43_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt s0, a0, .LBB43_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB43_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB43_1 ; RV64I-NEXT: .LBB43_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3336,45 +3334,44 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB44_2 ; RV32I-NEXT: .LBB44_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB44_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB44_7 ; RV32I-NEXT: .LBB44_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB44_4 +; RV32I-NEXT: beq a1, s0, .LBB44_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: slt a0, s0, a5 +; RV32I-NEXT: slt a4, s0, a1 ; RV32I-NEXT: j .LBB44_5 ; RV32I-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB44_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB44_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB44_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB44_1 ; RV32I-NEXT: .LBB44_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3390,45 +3387,44 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB44_2 ; RV32IA-NEXT: .LBB44_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB44_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB44_7 ; RV32IA-NEXT: .LBB44_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB44_4 +; RV32IA-NEXT: beq a1, s0, .LBB44_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: slt a0, s0, a5 +; RV32IA-NEXT: slt a4, s0, a1 ; RV32IA-NEXT: j .LBB44_5 ; RV32IA-NEXT: .LBB44_4: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB44_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB44_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB44_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB44_1 ; RV32IA-NEXT: .LBB44_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3442,30 +3438,30 @@ define signext i64 @atomicrmw_min_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB44_2 ; RV64I-NEXT: .LBB44_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB44_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB44_4 ; RV64I-NEXT: .LBB44_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bge s1, a3, .LBB44_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bge s0, a0, .LBB44_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB44_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB44_1 ; RV64I-NEXT: .LBB44_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3489,45 +3485,44 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB45_2 ; RV32I-NEXT: .LBB45_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB45_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB45_7 ; RV32I-NEXT: .LBB45_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB45_4 +; RV32I-NEXT: beq a1, s0, .LBB45_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB45_5 ; RV32I-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB45_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: bnez a0, .LBB45_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: bnez a4, .LBB45_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB45_1 ; RV32I-NEXT: .LBB45_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3543,45 +3538,44 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB45_2 ; RV32IA-NEXT: .LBB45_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB45_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB45_7 ; RV32IA-NEXT: .LBB45_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB45_4 +; RV32IA-NEXT: beq a1, s0, .LBB45_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB45_5 ; RV32IA-NEXT: .LBB45_4: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB45_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: bnez a0, .LBB45_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: bnez a4, .LBB45_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB45_1 ; RV32IA-NEXT: .LBB45_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3595,30 +3589,30 @@ define signext i64 @atomicrmw_umax_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB45_2 ; RV64I-NEXT: .LBB45_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB45_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB45_4 ; RV64I-NEXT: .LBB45_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bltu s1, a3, .LBB45_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu s0, a0, .LBB45_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB45_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB45_1 ; RV64I-NEXT: .LBB45_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3642,45 +3636,44 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB46_2 ; RV32I-NEXT: .LBB46_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: li a5, 0 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB46_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB46_7 ; RV32I-NEXT: .LBB46_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a5, s0, .LBB46_4 +; RV32I-NEXT: beq a1, s0, .LBB46_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a5 +; RV32I-NEXT: sltu a4, s0, a1 ; RV32I-NEXT: j .LBB46_5 ; RV32I-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a4 +; RV32I-NEXT: sltu a4, s1, a0 ; RV32I-NEXT: .LBB46_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: mv a2, a4 -; RV32I-NEXT: mv a3, a5 -; RV32I-NEXT: beqz a0, .LBB46_1 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: mv a3, a1 +; RV32I-NEXT: beqz a4, .LBB46_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 ; RV32I-NEXT: j .LBB46_1 ; RV32I-NEXT: .LBB46_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3696,45 +3689,44 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV32IA-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32IA-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB46_2 ; RV32IA-NEXT: .LBB46_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: li a4, 0 ; RV32IA-NEXT: li a5, 0 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB46_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB46_7 ; RV32IA-NEXT: .LBB46_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a5, s0, .LBB46_4 +; RV32IA-NEXT: beq a1, s0, .LBB46_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a5 +; RV32IA-NEXT: sltu a4, s0, a1 ; RV32IA-NEXT: j .LBB46_5 ; RV32IA-NEXT: .LBB46_4: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a4 +; RV32IA-NEXT: sltu a4, s1, a0 ; RV32IA-NEXT: .LBB46_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: mv a2, a4 -; RV32IA-NEXT: mv a3, a5 -; RV32IA-NEXT: beqz a0, .LBB46_1 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: mv a3, a1 +; RV32IA-NEXT: beqz a4, .LBB46_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 ; RV32IA-NEXT: j .LBB46_1 ; RV32IA-NEXT: .LBB46_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -3748,30 +3740,30 @@ define signext i64 @atomicrmw_umin_i64_monotonic(ptr %a, i64 %b) nounwind { ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB46_2 ; RV64I-NEXT: .LBB46_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB46_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB46_4 ; RV64I-NEXT: .LBB46_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a3 -; RV64I-NEXT: bgeu s1, a3, .LBB46_1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bgeu s0, a0, .LBB46_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB46_2 Depth=1 -; RV64I-NEXT: mv a2, s1 +; RV64I-NEXT: mv a2, s0 ; RV64I-NEXT: j .LBB46_1 ; RV64I-NEXT: .LBB46_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -4298,10 +4290,10 @@ define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: amoswap.w a0, a1, (a0) ; RV32IA-NEXT: ret ; RV32IA-NEXT: .LBB53_2: # %else -; RV32IA-NEXT: mv a1, a0 -; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 0(a0) ; RV32IA-NEXT: li a2, 1 -; RV32IA-NEXT: sw a2, 0(a1) +; RV32IA-NEXT: sw a2, 0(a0) +; RV32IA-NEXT: mv a0, a1 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xchg_i32_monotonic_crossbb: @@ -4334,10 +4326,10 @@ define signext i32 @atomicrmw_xchg_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: amoswap.w a0, a1, (a0) ; RV64IA-NEXT: ret ; RV64IA-NEXT: .LBB53_2: # %else -; RV64IA-NEXT: mv a1, a0 -; RV64IA-NEXT: lw a0, 0(a0) +; RV64IA-NEXT: lw a1, 0(a0) ; RV64IA-NEXT: li a2, 1 -; RV64IA-NEXT: sw a2, 0(a1) +; RV64IA-NEXT: sw a2, 0(a0) +; RV64IA-NEXT: mv a0, a1 ; RV64IA-NEXT: ret br i1 %c, label %then, label %else @@ -4385,10 +4377,10 @@ define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: amoadd.w a0, a1, (a0) ; RV32IA-NEXT: ret ; RV32IA-NEXT: .LBB54_2: # %else -; RV32IA-NEXT: mv a1, a0 -; RV32IA-NEXT: lw a0, 0(a0) -; RV32IA-NEXT: addi a2, a0, 1 -; RV32IA-NEXT: sw a2, 0(a1) +; RV32IA-NEXT: lw a1, 0(a0) +; RV32IA-NEXT: addi a2, a1, 1 +; RV32IA-NEXT: sw a2, 0(a0) +; RV32IA-NEXT: mv a0, a1 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_add_i32_monotonic_crossbb: @@ -4421,10 +4413,10 @@ define signext i32 @atomicrmw_add_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: amoadd.w a0, a1, (a0) ; RV64IA-NEXT: ret ; RV64IA-NEXT: .LBB54_2: # %else -; RV64IA-NEXT: mv a1, a0 -; RV64IA-NEXT: lw a0, 0(a0) -; RV64IA-NEXT: addi a2, a0, 1 -; RV64IA-NEXT: sw a2, 0(a1) +; RV64IA-NEXT: lw a1, 0(a0) +; RV64IA-NEXT: addi a2, a1, 1 +; RV64IA-NEXT: sw a2, 0(a0) +; RV64IA-NEXT: mv a0, a1 ; RV64IA-NEXT: ret br i1 %c, label %then, label %else @@ -4473,10 +4465,10 @@ define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: amoadd.w a0, a1, (a0) ; RV32IA-NEXT: ret ; RV32IA-NEXT: .LBB55_2: # %else -; RV32IA-NEXT: mv a1, a0 -; RV32IA-NEXT: lw a0, 0(a0) -; RV32IA-NEXT: addi a2, a0, -1 -; RV32IA-NEXT: sw a2, 0(a1) +; RV32IA-NEXT: lw a1, 0(a0) +; RV32IA-NEXT: addi a2, a1, -1 +; RV32IA-NEXT: sw a2, 0(a0) +; RV32IA-NEXT: mv a0, a1 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_sub_i32_monotonic_crossbb: @@ -4509,10 +4501,10 @@ define signext i32 @atomicrmw_sub_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: amoadd.w a0, a1, (a0) ; RV64IA-NEXT: ret ; RV64IA-NEXT: .LBB55_2: # %else -; RV64IA-NEXT: mv a1, a0 -; RV64IA-NEXT: lw a0, 0(a0) -; RV64IA-NEXT: addi a2, a0, -1 -; RV64IA-NEXT: sw a2, 0(a1) +; RV64IA-NEXT: lw a1, 0(a0) +; RV64IA-NEXT: addi a2, a1, -1 +; RV64IA-NEXT: sw a2, 0(a0) +; RV64IA-NEXT: mv a0, a1 ; RV64IA-NEXT: ret br i1 %c, label %then, label %else @@ -4561,10 +4553,10 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: amoand.w a0, a1, (a0) ; RV32IA-NEXT: ret ; RV32IA-NEXT: .LBB56_2: # %else -; RV32IA-NEXT: mv a1, a0 -; RV32IA-NEXT: lw a0, 0(a0) -; RV32IA-NEXT: andi a2, a0, 1 -; RV32IA-NEXT: sw a2, 0(a1) +; RV32IA-NEXT: lw a1, 0(a0) +; RV32IA-NEXT: andi a2, a1, 1 +; RV32IA-NEXT: sw a2, 0(a0) +; RV32IA-NEXT: mv a0, a1 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_and_i32_monotonic_crossbb: @@ -4597,10 +4589,10 @@ define signext i32 @atomicrmw_and_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: amoand.w a0, a1, (a0) ; RV64IA-NEXT: ret ; RV64IA-NEXT: .LBB56_2: # %else -; RV64IA-NEXT: mv a1, a0 -; RV64IA-NEXT: lw a0, 0(a0) -; RV64IA-NEXT: andi a2, a0, 1 -; RV64IA-NEXT: sw a2, 0(a1) +; RV64IA-NEXT: lw a1, 0(a0) +; RV64IA-NEXT: andi a2, a1, 1 +; RV64IA-NEXT: sw a2, 0(a0) +; RV64IA-NEXT: mv a0, a1 ; RV64IA-NEXT: ret br i1 %c, label %then, label %else @@ -4642,24 +4634,25 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; ; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic_crossbb: ; RV32IA-NOZACAS: # %bb.0: -; RV32IA-NOZACAS-NEXT: andi a2, a1, 1 -; RV32IA-NOZACAS-NEXT: mv a1, a0 -; RV32IA-NOZACAS-NEXT: beqz a2, .LBB57_2 +; RV32IA-NOZACAS-NEXT: andi a1, a1, 1 +; RV32IA-NOZACAS-NEXT: beqz a1, .LBB57_2 ; RV32IA-NOZACAS-NEXT: # %bb.1: # %then ; RV32IA-NOZACAS-NEXT: li a2, 1 ; RV32IA-NOZACAS-NEXT: .LBB57_3: # %then ; RV32IA-NOZACAS-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NOZACAS-NEXT: lr.w a0, (a1) -; RV32IA-NOZACAS-NEXT: and a3, a0, a2 +; RV32IA-NOZACAS-NEXT: lr.w a1, (a0) +; RV32IA-NOZACAS-NEXT: and a3, a1, a2 ; RV32IA-NOZACAS-NEXT: not a3, a3 -; RV32IA-NOZACAS-NEXT: sc.w a3, a3, (a1) +; RV32IA-NOZACAS-NEXT: sc.w a3, a3, (a0) ; RV32IA-NOZACAS-NEXT: bnez a3, .LBB57_3 ; RV32IA-NOZACAS-NEXT: # %bb.4: # %then +; RV32IA-NOZACAS-NEXT: mv a0, a1 ; RV32IA-NOZACAS-NEXT: ret ; RV32IA-NOZACAS-NEXT: .LBB57_2: # %else -; RV32IA-NOZACAS-NEXT: lw a0, 0(a1) -; RV32IA-NOZACAS-NEXT: andi a2, a0, 1 -; RV32IA-NOZACAS-NEXT: sw a2, 0(a1) +; RV32IA-NOZACAS-NEXT: lw a1, 0(a0) +; RV32IA-NOZACAS-NEXT: andi a2, a1, 1 +; RV32IA-NOZACAS-NEXT: sw a2, 0(a0) +; RV32IA-NOZACAS-NEXT: mv a0, a1 ; RV32IA-NOZACAS-NEXT: ret ; ; RV32IA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic_crossbb: @@ -4708,24 +4701,25 @@ define signext i32 @atomicrmw_nand_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; ; RV64IA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic_crossbb: ; RV64IA-NOZACAS: # %bb.0: -; RV64IA-NOZACAS-NEXT: andi a2, a1, 1 -; RV64IA-NOZACAS-NEXT: mv a1, a0 -; RV64IA-NOZACAS-NEXT: beqz a2, .LBB57_2 +; RV64IA-NOZACAS-NEXT: andi a1, a1, 1 +; RV64IA-NOZACAS-NEXT: beqz a1, .LBB57_2 ; RV64IA-NOZACAS-NEXT: # %bb.1: # %then ; RV64IA-NOZACAS-NEXT: li a2, 1 ; RV64IA-NOZACAS-NEXT: .LBB57_3: # %then ; RV64IA-NOZACAS-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64IA-NOZACAS-NEXT: lr.w a0, (a1) -; RV64IA-NOZACAS-NEXT: and a3, a0, a2 +; RV64IA-NOZACAS-NEXT: lr.w a1, (a0) +; RV64IA-NOZACAS-NEXT: and a3, a1, a2 ; RV64IA-NOZACAS-NEXT: not a3, a3 -; RV64IA-NOZACAS-NEXT: sc.w a3, a3, (a1) +; RV64IA-NOZACAS-NEXT: sc.w a3, a3, (a0) ; RV64IA-NOZACAS-NEXT: bnez a3, .LBB57_3 ; RV64IA-NOZACAS-NEXT: # %bb.4: # %then +; RV64IA-NOZACAS-NEXT: mv a0, a1 ; RV64IA-NOZACAS-NEXT: ret ; RV64IA-NOZACAS-NEXT: .LBB57_2: # %else -; RV64IA-NOZACAS-NEXT: lw a0, 0(a1) -; RV64IA-NOZACAS-NEXT: andi a2, a0, 1 -; RV64IA-NOZACAS-NEXT: sw a2, 0(a1) +; RV64IA-NOZACAS-NEXT: lw a1, 0(a0) +; RV64IA-NOZACAS-NEXT: andi a2, a1, 1 +; RV64IA-NOZACAS-NEXT: sw a2, 0(a0) +; RV64IA-NOZACAS-NEXT: mv a0, a1 ; RV64IA-NOZACAS-NEXT: ret ; ; RV64IA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic_crossbb: @@ -4797,10 +4791,10 @@ define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind { ; RV32IA-NEXT: amoor.w a0, a1, (a0) ; RV32IA-NEXT: ret ; RV32IA-NEXT: .LBB58_2: # %else -; RV32IA-NEXT: mv a1, a0 -; RV32IA-NEXT: lw a0, 0(a0) -; RV32IA-NEXT: ori a2, a0, 1 -; RV32IA-NEXT: sw a2, 0(a1) +; RV32IA-NEXT: lw a1, 0(a0) +; RV32IA-NEXT: ori a2, a1, 1 +; RV32IA-NEXT: sw a2, 0(a0) +; RV32IA-NEXT: mv a0, a1 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_or_i32_monotonic_crossbb: @@ -4833,10 +4827,10 @@ define signext i32 @atomicrmw_or_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind { ; RV64IA-NEXT: amoor.w a0, a1, (a0) ; RV64IA-NEXT: ret ; RV64IA-NEXT: .LBB58_2: # %else -; RV64IA-NEXT: mv a1, a0 -; RV64IA-NEXT: lw a0, 0(a0) -; RV64IA-NEXT: ori a2, a0, 1 -; RV64IA-NEXT: sw a2, 0(a1) +; RV64IA-NEXT: lw a1, 0(a0) +; RV64IA-NEXT: ori a2, a1, 1 +; RV64IA-NEXT: sw a2, 0(a0) +; RV64IA-NEXT: mv a0, a1 ; RV64IA-NEXT: ret br i1 %c, label %then, label %else @@ -4885,10 +4879,10 @@ define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: amoxor.w a0, a1, (a0) ; RV32IA-NEXT: ret ; RV32IA-NEXT: .LBB59_2: # %else -; RV32IA-NEXT: mv a1, a0 -; RV32IA-NEXT: lw a0, 0(a0) -; RV32IA-NEXT: xori a2, a0, 1 -; RV32IA-NEXT: sw a2, 0(a1) +; RV32IA-NEXT: lw a1, 0(a0) +; RV32IA-NEXT: xori a2, a1, 1 +; RV32IA-NEXT: sw a2, 0(a0) +; RV32IA-NEXT: mv a0, a1 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_xor_i32_monotonic_crossbb: @@ -4921,10 +4915,10 @@ define signext i32 @atomicrmw_xor_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: amoxor.w a0, a1, (a0) ; RV64IA-NEXT: ret ; RV64IA-NEXT: .LBB59_2: # %else -; RV64IA-NEXT: mv a1, a0 -; RV64IA-NEXT: lw a0, 0(a0) -; RV64IA-NEXT: xori a2, a0, 1 -; RV64IA-NEXT: sw a2, 0(a1) +; RV64IA-NEXT: lw a1, 0(a0) +; RV64IA-NEXT: xori a2, a1, 1 +; RV64IA-NEXT: sw a2, 0(a0) +; RV64IA-NEXT: mv a0, a1 ; RV64IA-NEXT: ret br i1 %c, label %then, label %else @@ -4949,40 +4943,40 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: andi a1, a1, 1 ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: andi a1, a1, 1 ; RV32I-NEXT: beqz a1, .LBB60_5 ; RV32I-NEXT: # %bb.1: # %then -; RV32I-NEXT: lw a1, 0(s0) +; RV32I-NEXT: lw a0, 0(s0) ; RV32I-NEXT: j .LBB60_3 ; RV32I-NEXT: .LBB60_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB60_3 Depth=1 -; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: sw a0, 4(sp) ; RV32I-NEXT: addi a1, sp, 4 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: bnez a0, .LBB60_8 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 4(sp) +; RV32I-NEXT: bnez a1, .LBB60_8 ; RV32I-NEXT: .LBB60_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bgtz a1, .LBB60_2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bgtz a0, .LBB60_2 ; RV32I-NEXT: # %bb.4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB60_3 Depth=1 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: j .LBB60_2 ; RV32I-NEXT: .LBB60_5: # %else -; RV32I-NEXT: lw a1, 0(s0) -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: bgtz a1, .LBB60_7 +; RV32I-NEXT: lw a0, 0(s0) +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: bgtz a0, .LBB60_7 ; RV32I-NEXT: # %bb.6: # %else -; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: li a1, 1 ; RV32I-NEXT: .LBB60_7: # %else -; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 0(s0) ; RV32I-NEXT: .LBB60_8: # %merge -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -4990,21 +4984,21 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; ; RV32IA-LABEL: atomicrmw_max_i32_monotonic_crossbb: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a1, 1 -; RV32IA-NEXT: mv a1, a0 -; RV32IA-NEXT: beqz a2, .LBB60_2 +; RV32IA-NEXT: andi a1, a1, 1 +; RV32IA-NEXT: beqz a1, .LBB60_2 ; RV32IA-NEXT: # %bb.1: # %then -; RV32IA-NEXT: li a0, 1 -; RV32IA-NEXT: amomax.w a0, a0, (a1) +; RV32IA-NEXT: li a1, 1 +; RV32IA-NEXT: amomax.w a0, a1, (a0) ; RV32IA-NEXT: ret ; RV32IA-NEXT: .LBB60_2: # %else -; RV32IA-NEXT: lw a0, 0(a1) -; RV32IA-NEXT: mv a2, a0 -; RV32IA-NEXT: bgtz a0, .LBB60_4 +; RV32IA-NEXT: lw a1, 0(a0) +; RV32IA-NEXT: mv a2, a1 +; RV32IA-NEXT: bgtz a1, .LBB60_4 ; RV32IA-NEXT: # %bb.3: # %else ; RV32IA-NEXT: li a2, 1 ; RV32IA-NEXT: .LBB60_4: # %else -; RV32IA-NEXT: sw a2, 0(a1) +; RV32IA-NEXT: sw a2, 0(a0) +; RV32IA-NEXT: mv a0, a1 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_max_i32_monotonic_crossbb: @@ -5012,41 +5006,41 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64I-NEXT: addi sp, sp, -32 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: andi a1, a1, 1 ; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: andi a1, a1, 1 ; RV64I-NEXT: beqz a1, .LBB60_5 ; RV64I-NEXT: # %bb.1: # %then -; RV64I-NEXT: lw a1, 0(s0) +; RV64I-NEXT: lw a0, 0(s0) ; RV64I-NEXT: j .LBB60_3 ; RV64I-NEXT: .LBB60_2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB60_3 Depth=1 -; RV64I-NEXT: sw a1, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a1, 12(sp) -; RV64I-NEXT: bnez a0, .LBB60_8 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB60_8 ; RV64I-NEXT: .LBB60_3: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: blt a0, a1, .LBB60_2 +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt a1, a0, .LBB60_2 ; RV64I-NEXT: # %bb.4: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB60_3 Depth=1 ; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: j .LBB60_2 ; RV64I-NEXT: .LBB60_5: # %else -; RV64I-NEXT: lw a1, 0(s0) -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: bgtz a1, .LBB60_7 +; RV64I-NEXT: lw a0, 0(s0) +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: bgtz a0, .LBB60_7 ; RV64I-NEXT: # %bb.6: # %else -; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: li a1, 1 ; RV64I-NEXT: .LBB60_7: # %else -; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: sw a1, 0(s0) ; RV64I-NEXT: .LBB60_8: # %merge -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 @@ -5054,21 +5048,21 @@ define signext i32 @atomicrmw_max_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; ; RV64IA-LABEL: atomicrmw_max_i32_monotonic_crossbb: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a1, 1 -; RV64IA-NEXT: mv a1, a0 -; RV64IA-NEXT: beqz a2, .LBB60_2 +; RV64IA-NEXT: andi a1, a1, 1 +; RV64IA-NEXT: beqz a1, .LBB60_2 ; RV64IA-NEXT: # %bb.1: # %then -; RV64IA-NEXT: li a0, 1 -; RV64IA-NEXT: amomax.w a0, a0, (a1) +; RV64IA-NEXT: li a1, 1 +; RV64IA-NEXT: amomax.w a0, a1, (a0) ; RV64IA-NEXT: ret ; RV64IA-NEXT: .LBB60_2: # %else -; RV64IA-NEXT: lw a0, 0(a1) -; RV64IA-NEXT: mv a2, a0 -; RV64IA-NEXT: bgtz a0, .LBB60_4 +; RV64IA-NEXT: lw a1, 0(a0) +; RV64IA-NEXT: mv a2, a1 +; RV64IA-NEXT: bgtz a1, .LBB60_4 ; RV64IA-NEXT: # %bb.3: # %else ; RV64IA-NEXT: li a2, 1 ; RV64IA-NEXT: .LBB60_4: # %else -; RV64IA-NEXT: sw a2, 0(a1) +; RV64IA-NEXT: sw a2, 0(a0) +; RV64IA-NEXT: mv a0, a1 ; RV64IA-NEXT: ret br i1 %c, label %then, label %else @@ -5095,41 +5089,41 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: andi a1, a1, 1 ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: andi a1, a1, 1 ; RV32I-NEXT: beqz a1, .LBB61_5 ; RV32I-NEXT: # %bb.1: # %then -; RV32I-NEXT: lw a1, 0(s0) +; RV32I-NEXT: lw a0, 0(s0) ; RV32I-NEXT: li s1, 2 ; RV32I-NEXT: j .LBB61_3 ; RV32I-NEXT: .LBB61_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB61_3 Depth=1 -; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a1, 0(sp) -; RV32I-NEXT: bnez a0, .LBB61_8 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB61_8 ; RV32I-NEXT: .LBB61_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: blt a1, s1, .LBB61_2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: blt a0, s1, .LBB61_2 ; RV32I-NEXT: # %bb.4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB61_3 Depth=1 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: j .LBB61_2 ; RV32I-NEXT: .LBB61_5: # %else -; RV32I-NEXT: lw a1, 0(s0) -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: blez a1, .LBB61_7 +; RV32I-NEXT: lw a0, 0(s0) +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: blez a0, .LBB61_7 ; RV32I-NEXT: # %bb.6: # %else -; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: li a1, 1 ; RV32I-NEXT: .LBB61_7: # %else -; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 0(s0) ; RV32I-NEXT: .LBB61_8: # %merge -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -5138,21 +5132,21 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; ; RV32IA-LABEL: atomicrmw_min_i32_monotonic_crossbb: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a1, 1 -; RV32IA-NEXT: mv a1, a0 -; RV32IA-NEXT: beqz a2, .LBB61_2 +; RV32IA-NEXT: andi a1, a1, 1 +; RV32IA-NEXT: beqz a1, .LBB61_2 ; RV32IA-NEXT: # %bb.1: # %then -; RV32IA-NEXT: li a0, 1 -; RV32IA-NEXT: amomin.w a0, a0, (a1) +; RV32IA-NEXT: li a1, 1 +; RV32IA-NEXT: amomin.w a0, a1, (a0) ; RV32IA-NEXT: ret ; RV32IA-NEXT: .LBB61_2: # %else -; RV32IA-NEXT: lw a0, 0(a1) -; RV32IA-NEXT: mv a2, a0 -; RV32IA-NEXT: blez a0, .LBB61_4 +; RV32IA-NEXT: lw a1, 0(a0) +; RV32IA-NEXT: mv a2, a1 +; RV32IA-NEXT: blez a1, .LBB61_4 ; RV32IA-NEXT: # %bb.3: # %else ; RV32IA-NEXT: li a2, 1 ; RV32IA-NEXT: .LBB61_4: # %else -; RV32IA-NEXT: sw a2, 0(a1) +; RV32IA-NEXT: sw a2, 0(a0) +; RV32IA-NEXT: mv a0, a1 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_min_i32_monotonic_crossbb: @@ -5161,41 +5155,41 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: andi a1, a1, 1 ; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: andi a1, a1, 1 ; RV64I-NEXT: beqz a1, .LBB61_5 ; RV64I-NEXT: # %bb.1: # %then -; RV64I-NEXT: lw a1, 0(s0) +; RV64I-NEXT: lw a0, 0(s0) ; RV64I-NEXT: li s1, 2 ; RV64I-NEXT: j .LBB61_3 ; RV64I-NEXT: .LBB61_2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB61_3 Depth=1 -; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: addi a1, sp, 4 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a1, 4(sp) -; RV64I-NEXT: bnez a0, .LBB61_8 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 4(sp) +; RV64I-NEXT: bnez a1, .LBB61_8 ; RV64I-NEXT: .LBB61_3: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: blt a1, s1, .LBB61_2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: blt a0, s1, .LBB61_2 ; RV64I-NEXT: # %bb.4: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB61_3 Depth=1 ; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: j .LBB61_2 ; RV64I-NEXT: .LBB61_5: # %else -; RV64I-NEXT: lw a1, 0(s0) -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: blez a1, .LBB61_7 +; RV64I-NEXT: lw a0, 0(s0) +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: blez a0, .LBB61_7 ; RV64I-NEXT: # %bb.6: # %else -; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: li a1, 1 ; RV64I-NEXT: .LBB61_7: # %else -; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: sw a1, 0(s0) ; RV64I-NEXT: .LBB61_8: # %merge -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -5204,21 +5198,21 @@ define signext i32 @atomicrmw_min_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; ; RV64IA-LABEL: atomicrmw_min_i32_monotonic_crossbb: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a1, 1 -; RV64IA-NEXT: mv a1, a0 -; RV64IA-NEXT: beqz a2, .LBB61_2 +; RV64IA-NEXT: andi a1, a1, 1 +; RV64IA-NEXT: beqz a1, .LBB61_2 ; RV64IA-NEXT: # %bb.1: # %then -; RV64IA-NEXT: li a0, 1 -; RV64IA-NEXT: amomin.w a0, a0, (a1) +; RV64IA-NEXT: li a1, 1 +; RV64IA-NEXT: amomin.w a0, a1, (a0) ; RV64IA-NEXT: ret ; RV64IA-NEXT: .LBB61_2: # %else -; RV64IA-NEXT: lw a0, 0(a1) -; RV64IA-NEXT: mv a2, a0 -; RV64IA-NEXT: blez a0, .LBB61_4 +; RV64IA-NEXT: lw a1, 0(a0) +; RV64IA-NEXT: mv a2, a1 +; RV64IA-NEXT: blez a1, .LBB61_4 ; RV64IA-NEXT: # %bb.3: # %else ; RV64IA-NEXT: li a2, 1 ; RV64IA-NEXT: .LBB61_4: # %else -; RV64IA-NEXT: sw a2, 0(a1) +; RV64IA-NEXT: sw a2, 0(a0) +; RV64IA-NEXT: mv a0, a1 ; RV64IA-NEXT: ret br i1 %c, label %then, label %else @@ -5244,31 +5238,31 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: andi a1, a1, 1 ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: andi a1, a1, 1 ; RV32I-NEXT: beqz a1, .LBB62_3 ; RV32I-NEXT: # %bb.1: # %then -; RV32I-NEXT: lw a1, 0(s0) +; RV32I-NEXT: lw a0, 0(s0) ; RV32I-NEXT: .LBB62_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: seqz a2, a1 -; RV32I-NEXT: add a2, a1, a2 -; RV32I-NEXT: sw a1, 4(sp) +; RV32I-NEXT: seqz a2, a0 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: sw a0, 4(sp) ; RV32I-NEXT: addi a1, sp, 4 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a1, 4(sp) -; RV32I-NEXT: beqz a0, .LBB62_2 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 4(sp) +; RV32I-NEXT: beqz a1, .LBB62_2 ; RV32I-NEXT: j .LBB62_4 ; RV32I-NEXT: .LBB62_3: # %else -; RV32I-NEXT: lw a1, 0(s0) -; RV32I-NEXT: seqz a0, a1 -; RV32I-NEXT: add a0, a1, a0 -; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: lw a0, 0(s0) +; RV32I-NEXT: seqz a1, a0 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: sw a1, 0(s0) ; RV32I-NEXT: .LBB62_4: # %merge -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -5283,11 +5277,11 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32IA-NEXT: amomaxu.w a0, a1, (a0) ; RV32IA-NEXT: ret ; RV32IA-NEXT: .LBB62_2: # %else -; RV32IA-NEXT: mv a1, a0 -; RV32IA-NEXT: lw a0, 0(a0) -; RV32IA-NEXT: seqz a2, a0 -; RV32IA-NEXT: add a2, a0, a2 -; RV32IA-NEXT: sw a2, 0(a1) +; RV32IA-NEXT: lw a1, 0(a0) +; RV32IA-NEXT: seqz a2, a1 +; RV32IA-NEXT: add a2, a1, a2 +; RV32IA-NEXT: sw a2, 0(a0) +; RV32IA-NEXT: mv a0, a1 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umax_i32_monotonic_crossbb: @@ -5295,38 +5289,38 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64I-NEXT: addi sp, sp, -32 ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: andi a1, a1, 1 ; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: andi a1, a1, 1 ; RV64I-NEXT: beqz a1, .LBB62_5 ; RV64I-NEXT: # %bb.1: # %then -; RV64I-NEXT: lw a1, 0(s0) +; RV64I-NEXT: lw a0, 0(s0) ; RV64I-NEXT: j .LBB62_3 ; RV64I-NEXT: .LBB62_2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB62_3 Depth=1 -; RV64I-NEXT: sw a1, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a1, 12(sp) -; RV64I-NEXT: bnez a0, .LBB62_6 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB62_6 ; RV64I-NEXT: .LBB62_3: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: li a0, 1 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu a0, a1, .LBB62_2 +; RV64I-NEXT: li a1, 1 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu a1, a0, .LBB62_2 ; RV64I-NEXT: # %bb.4: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB62_3 Depth=1 ; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: j .LBB62_2 ; RV64I-NEXT: .LBB62_5: # %else -; RV64I-NEXT: lw a1, 0(s0) -; RV64I-NEXT: seqz a0, a1 -; RV64I-NEXT: add a0, a1, a0 -; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: lw a0, 0(s0) +; RV64I-NEXT: seqz a1, a0 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: sw a1, 0(s0) ; RV64I-NEXT: .LBB62_6: # %merge -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 32 @@ -5341,11 +5335,11 @@ define signext i32 @atomicrmw_umax_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64IA-NEXT: amomaxu.w a0, a1, (a0) ; RV64IA-NEXT: ret ; RV64IA-NEXT: .LBB62_2: # %else -; RV64IA-NEXT: mv a1, a0 -; RV64IA-NEXT: lw a0, 0(a0) -; RV64IA-NEXT: seqz a2, a0 -; RV64IA-NEXT: add a2, a0, a2 -; RV64IA-NEXT: sw a2, 0(a1) +; RV64IA-NEXT: lw a1, 0(a0) +; RV64IA-NEXT: seqz a2, a1 +; RV64IA-NEXT: add a2, a1, a2 +; RV64IA-NEXT: sw a2, 0(a0) +; RV64IA-NEXT: mv a0, a1 ; RV64IA-NEXT: ret br i1 %c, label %then, label %else @@ -5372,42 +5366,42 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill -; RV32I-NEXT: andi a1, a1, 1 ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: andi a1, a1, 1 ; RV32I-NEXT: beqz a1, .LBB63_5 ; RV32I-NEXT: # %bb.1: # %then -; RV32I-NEXT: lw a1, 0(s0) +; RV32I-NEXT: lw a0, 0(s0) ; RV32I-NEXT: li s1, 2 ; RV32I-NEXT: j .LBB63_3 ; RV32I-NEXT: .LBB63_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB63_3 Depth=1 -; RV32I-NEXT: sw a1, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: li a4, 0 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a1, 0(sp) -; RV32I-NEXT: bnez a0, .LBB63_8 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB63_8 ; RV32I-NEXT: .LBB63_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: bltu a1, s1, .LBB63_2 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: bltu a0, s1, .LBB63_2 ; RV32I-NEXT: # %bb.4: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB63_3 Depth=1 ; RV32I-NEXT: li a2, 1 ; RV32I-NEXT: j .LBB63_2 ; RV32I-NEXT: .LBB63_5: # %else -; RV32I-NEXT: lw a1, 0(s0) +; RV32I-NEXT: lw a0, 0(s0) ; RV32I-NEXT: li a2, 1 -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: bltu a1, a2, .LBB63_7 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: bltu a0, a2, .LBB63_7 ; RV32I-NEXT: # %bb.6: # %else -; RV32I-NEXT: li a0, 1 +; RV32I-NEXT: li a1, 1 ; RV32I-NEXT: .LBB63_7: # %else -; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 0(s0) ; RV32I-NEXT: .LBB63_8: # %merge -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -5416,22 +5410,22 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; ; RV32IA-LABEL: atomicrmw_umin_i32_monotonic_crossbb: ; RV32IA: # %bb.0: -; RV32IA-NEXT: andi a2, a1, 1 -; RV32IA-NEXT: mv a1, a0 -; RV32IA-NEXT: beqz a2, .LBB63_2 +; RV32IA-NEXT: andi a1, a1, 1 +; RV32IA-NEXT: beqz a1, .LBB63_2 ; RV32IA-NEXT: # %bb.1: # %then -; RV32IA-NEXT: li a0, 1 -; RV32IA-NEXT: amominu.w a0, a0, (a1) +; RV32IA-NEXT: li a1, 1 +; RV32IA-NEXT: amominu.w a0, a1, (a0) ; RV32IA-NEXT: ret ; RV32IA-NEXT: .LBB63_2: # %else -; RV32IA-NEXT: lw a0, 0(a1) +; RV32IA-NEXT: lw a1, 0(a0) ; RV32IA-NEXT: li a3, 1 -; RV32IA-NEXT: mv a2, a0 -; RV32IA-NEXT: bltu a0, a3, .LBB63_4 +; RV32IA-NEXT: mv a2, a1 +; RV32IA-NEXT: bltu a1, a3, .LBB63_4 ; RV32IA-NEXT: # %bb.3: # %else ; RV32IA-NEXT: li a2, 1 ; RV32IA-NEXT: .LBB63_4: # %else -; RV32IA-NEXT: sw a2, 0(a1) +; RV32IA-NEXT: sw a2, 0(a0) +; RV32IA-NEXT: mv a0, a1 ; RV32IA-NEXT: ret ; ; RV64I-LABEL: atomicrmw_umin_i32_monotonic_crossbb: @@ -5440,42 +5434,42 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: andi a1, a1, 1 ; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: andi a1, a1, 1 ; RV64I-NEXT: beqz a1, .LBB63_5 ; RV64I-NEXT: # %bb.1: # %then -; RV64I-NEXT: lw a1, 0(s0) +; RV64I-NEXT: lw a0, 0(s0) ; RV64I-NEXT: li s1, 2 ; RV64I-NEXT: j .LBB63_3 ; RV64I-NEXT: .LBB63_2: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB63_3 Depth=1 -; RV64I-NEXT: sw a1, 4(sp) +; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: addi a1, sp, 4 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: li a3, 0 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a1, 4(sp) -; RV64I-NEXT: bnez a0, .LBB63_8 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 4(sp) +; RV64I-NEXT: bnez a1, .LBB63_8 ; RV64I-NEXT: .LBB63_3: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: mv a2, a1 -; RV64I-NEXT: bltu a1, s1, .LBB63_2 +; RV64I-NEXT: mv a2, a0 +; RV64I-NEXT: bltu a0, s1, .LBB63_2 ; RV64I-NEXT: # %bb.4: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB63_3 Depth=1 ; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: j .LBB63_2 ; RV64I-NEXT: .LBB63_5: # %else -; RV64I-NEXT: lw a1, 0(s0) +; RV64I-NEXT: lw a0, 0(s0) ; RV64I-NEXT: li a2, 1 -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: bltu a1, a2, .LBB63_7 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: bltu a0, a2, .LBB63_7 ; RV64I-NEXT: # %bb.6: # %else -; RV64I-NEXT: li a0, 1 +; RV64I-NEXT: li a1, 1 ; RV64I-NEXT: .LBB63_7: # %else -; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: sw a1, 0(s0) ; RV64I-NEXT: .LBB63_8: # %merge -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -5484,22 +5478,22 @@ define signext i32 @atomicrmw_umin_i32_monotonic_crossbb(ptr %a, i1 %c) nounwind ; ; RV64IA-LABEL: atomicrmw_umin_i32_monotonic_crossbb: ; RV64IA: # %bb.0: -; RV64IA-NEXT: andi a2, a1, 1 -; RV64IA-NEXT: mv a1, a0 -; RV64IA-NEXT: beqz a2, .LBB63_2 +; RV64IA-NEXT: andi a1, a1, 1 +; RV64IA-NEXT: beqz a1, .LBB63_2 ; RV64IA-NEXT: # %bb.1: # %then -; RV64IA-NEXT: li a0, 1 -; RV64IA-NEXT: amominu.w a0, a0, (a1) +; RV64IA-NEXT: li a1, 1 +; RV64IA-NEXT: amominu.w a0, a1, (a0) ; RV64IA-NEXT: ret ; RV64IA-NEXT: .LBB63_2: # %else -; RV64IA-NEXT: lw a0, 0(a1) +; RV64IA-NEXT: lw a1, 0(a0) ; RV64IA-NEXT: li a3, 1 -; RV64IA-NEXT: mv a2, a0 -; RV64IA-NEXT: bltu a0, a3, .LBB63_4 +; RV64IA-NEXT: mv a2, a1 +; RV64IA-NEXT: bltu a1, a3, .LBB63_4 ; RV64IA-NEXT: # %bb.3: # %else ; RV64IA-NEXT: li a2, 1 ; RV64IA-NEXT: .LBB63_4: # %else -; RV64IA-NEXT: sw a2, 0(a1) +; RV64IA-NEXT: sw a2, 0(a0) +; RV64IA-NEXT: mv a0, a1 ; RV64IA-NEXT: ret br i1 %c, label %then, label %else diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll index 34b29ea1dc6c2..82e64c9cb5f65 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-cond-sub-clamp.ll @@ -26,27 +26,27 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) { ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: .LBB0_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: sltu a0, a0, s2 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a0, a0, s1 -; RV32I-NEXT: sub a2, a3, a0 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: sltu a1, a1, s2 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a1, s0 +; RV32I-NEXT: sub a2, a0, a1 +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: beqz a0, .LBB0_1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: beqz a1, .LBB0_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -65,9 +65,9 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) { ; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: li a4, 255 ; RV32IA-NEXT: andi a0, a3, 24 -; RV32IA-NEXT: lw a5, 0(a2) ; RV32IA-NEXT: sll a3, a4, a3 ; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: lw a5, 0(a2) ; RV32IA-NEXT: andi a4, a1, 255 ; RV32IA-NEXT: .LBB0_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Loop Header: Depth=1 @@ -109,27 +109,27 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) { ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: .cfi_offset s2, -32 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: .LBB0_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: sltu a0, a0, s2 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: and a0, a0, s1 -; RV64I-NEXT: sub a2, a3, a0 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: sltu a1, a1, s2 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a1, a1, s0 +; RV64I-NEXT: sub a2, a0, a1 +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: beqz a0, .LBB0_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: beqz a1, .LBB0_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -145,18 +145,18 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) { ; RV64IA-LABEL: atomicrmw_usub_cond_i8: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: li a5, 255 -; RV64IA-NEXT: andi a0, a4, 24 -; RV64IA-NEXT: lw a3, 0(a2) -; RV64IA-NEXT: sllw a4, a5, a4 -; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: li a4, 255 +; RV64IA-NEXT: andi a0, a3, 24 +; RV64IA-NEXT: sllw a3, a4, a3 +; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: lw a4, 0(a2) ; RV64IA-NEXT: andi a5, a1, 255 ; RV64IA-NEXT: .LBB0_1: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB0_3 Depth 2 -; RV64IA-NEXT: srlw a6, a3, a0 -; RV64IA-NEXT: sext.w a7, a3 +; RV64IA-NEXT: srlw a6, a4, a0 +; RV64IA-NEXT: sext.w a7, a4 ; RV64IA-NEXT: andi t0, a6, 255 ; RV64IA-NEXT: sltu t0, t0, a5 ; RV64IA-NEXT: addi t0, t0, -1 @@ -164,20 +164,20 @@ define i8 @atomicrmw_usub_cond_i8(ptr %ptr, i8 %val) { ; RV64IA-NEXT: subw a6, a6, t0 ; RV64IA-NEXT: andi a6, a6, 255 ; RV64IA-NEXT: sllw a6, a6, a0 -; RV64IA-NEXT: and a3, a3, a4 -; RV64IA-NEXT: or a6, a3, a6 +; RV64IA-NEXT: and a4, a4, a3 +; RV64IA-NEXT: or a6, a4, a6 ; RV64IA-NEXT: .LBB0_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NEXT: bne a3, a7, .LBB0_1 +; RV64IA-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NEXT: bne a4, a7, .LBB0_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2 ; RV64IA-NEXT: sc.w.rl t0, a6, (a2) ; RV64IA-NEXT: bnez t0, .LBB0_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a3, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: ret %result = atomicrmw usub_cond ptr %ptr, i8 %val seq_cst ret i8 %result @@ -200,27 +200,27 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { ; RV32I-NEXT: .cfi_offset s3, -20 ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: .LBB1_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: sltu a0, a0, s3 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a0, a0, s0 -; RV32I-NEXT: sub a2, a1, a0 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: sltu a1, a1, s3 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a1, s0 +; RV32I-NEXT: sub a2, a0, a1 +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: beqz a0, .LBB1_1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: beqz a1, .LBB1_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -242,9 +242,9 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: andi a0, a4, 24 ; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: lw a6, 0(a2) ; RV32IA-NEXT: sll a4, a3, a4 ; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: lw a6, 0(a2) ; RV32IA-NEXT: and a5, a1, a3 ; RV32IA-NEXT: .LBB1_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Loop Header: Depth=1 @@ -290,27 +290,27 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { ; RV64I-NEXT: .cfi_offset s3, -40 ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: .LBB1_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: sltu a0, a0, s3 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: and a0, a0, s0 -; RV64I-NEXT: sub a2, a1, a0 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: sltu a1, a1, s3 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a1, a1, s0 +; RV64I-NEXT: sub a2, a0, a1 +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: beqz a0, .LBB1_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: beqz a1, .LBB1_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -328,19 +328,19 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { ; RV64IA-LABEL: atomicrmw_usub_cond_i16: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a5, a0, 3 +; RV64IA-NEXT: slli a4, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: andi a0, a5, 24 +; RV64IA-NEXT: andi a0, a4, 24 ; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: lw a4, 0(a2) -; RV64IA-NEXT: sllw a5, a3, a5 -; RV64IA-NEXT: not a5, a5 +; RV64IA-NEXT: sllw a4, a3, a4 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: lw a5, 0(a2) ; RV64IA-NEXT: and a6, a1, a3 ; RV64IA-NEXT: .LBB1_1: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB1_3 Depth 2 -; RV64IA-NEXT: srlw a7, a4, a0 -; RV64IA-NEXT: sext.w t0, a4 +; RV64IA-NEXT: srlw a7, a5, a0 +; RV64IA-NEXT: sext.w t0, a5 ; RV64IA-NEXT: and t1, a7, a3 ; RV64IA-NEXT: sltu t1, t1, a6 ; RV64IA-NEXT: addi t1, t1, -1 @@ -348,20 +348,20 @@ define i16 @atomicrmw_usub_cond_i16(ptr %ptr, i16 %val) { ; RV64IA-NEXT: subw a7, a7, t1 ; RV64IA-NEXT: and a7, a7, a3 ; RV64IA-NEXT: sllw a7, a7, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: or a7, a4, a7 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: or a7, a5, a7 ; RV64IA-NEXT: .LBB1_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a4, (a2) -; RV64IA-NEXT: bne a4, t0, .LBB1_1 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: bne a5, t0, .LBB1_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2 ; RV64IA-NEXT: sc.w.rl t1, a7, (a2) ; RV64IA-NEXT: bnez t1, .LBB1_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret %result = atomicrmw usub_cond ptr %ptr, i16 %val seq_cst ret i16 %result @@ -378,25 +378,25 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) { ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: .LBB2_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sltu a0, a3, s1 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a0, a0, s1 -; RV32I-NEXT: sub a2, a3, a0 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sltu a1, a0, s0 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a1, a1, s0 +; RV32I-NEXT: sub a2, a0, a1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: beqz a0, .LBB2_1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a1, .LBB2_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -444,26 +444,26 @@ define i32 @atomicrmw_usub_cond_i32(ptr %ptr, i32 %val) { ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: .cfi_offset s2, -32 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: .LBB2_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sltu a0, a3, s2 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: and a0, a0, s1 -; RV64I-NEXT: subw a2, a3, a0 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sltu a1, a0, s2 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a1, a1, s0 +; RV64I-NEXT: subw a2, a0, a1 +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: beqz a0, .LBB2_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: beqz a1, .LBB2_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -519,43 +519,42 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a5, s0 +; RV32I-NEXT: sltu a2, a1, s0 ; RV32I-NEXT: .LBB3_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: xori a0, a0, 1 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: and a1, a0, s2 -; RV32I-NEXT: and a0, a0, s0 -; RV32I-NEXT: sltu a3, a4, a1 -; RV32I-NEXT: sub a0, a5, a0 -; RV32I-NEXT: sub a2, a4, a1 -; RV32I-NEXT: sub a3, a0, a3 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: xori a2, a2, 1 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a3, a2, s1 +; RV32I-NEXT: and a2, a2, s0 +; RV32I-NEXT: sltu a4, a0, a3 +; RV32I-NEXT: sub a5, a1, a2 +; RV32I-NEXT: sub a2, a0, a3 +; RV32I-NEXT: sub a3, a5, a4 +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB3_5 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB3_5 ; RV32I-NEXT: .LBB3_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: bne a5, s0, .LBB3_1 +; RV32I-NEXT: bne a1, s0, .LBB3_1 ; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a4, s2 +; RV32I-NEXT: sltu a2, a0, s1 ; RV32I-NEXT: j .LBB3_2 ; RV32I-NEXT: .LBB3_5: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -581,43 +580,42 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB3_3 ; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a5, s0 +; RV32IA-NEXT: sltu a2, a1, s0 ; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: xori a0, a0, 1 -; RV32IA-NEXT: neg a0, a0 -; RV32IA-NEXT: and a1, a0, s2 -; RV32IA-NEXT: and a0, a0, s0 -; RV32IA-NEXT: sltu a3, a4, a1 -; RV32IA-NEXT: sub a0, a5, a0 -; RV32IA-NEXT: sub a2, a4, a1 -; RV32IA-NEXT: sub a3, a0, a3 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: xori a2, a2, 1 +; RV32IA-NEXT: neg a2, a2 +; RV32IA-NEXT: and a3, a2, s1 +; RV32IA-NEXT: and a2, a2, s0 +; RV32IA-NEXT: sltu a4, a0, a3 +; RV32IA-NEXT: sub a5, a1, a2 +; RV32IA-NEXT: sub a2, a0, a3 +; RV32IA-NEXT: sub a3, a5, a4 +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB3_5 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB3_5 ; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: bne a5, s0, .LBB3_1 +; RV32IA-NEXT: bne a1, s0, .LBB3_1 ; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a4, s2 +; RV32IA-NEXT: sltu a2, a0, s1 ; RV32IA-NEXT: j .LBB3_2 ; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -640,25 +638,25 @@ define i64 @atomicrmw_usub_cond_i64(ptr %ptr, i64 %val) { ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: .LBB3_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sltu a0, a3, s1 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: and a0, a0, s1 -; RV64I-NEXT: sub a2, a3, a0 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sltu a1, a0, s0 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a1, a1, s0 +; RV64I-NEXT: sub a2, a0, a1 +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: beqz a0, .LBB3_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: beqz a1, .LBB3_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -709,25 +707,25 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) { ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s1, a1, 255 ; RV32I-NEXT: .LBB4_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: sub a1, a0, s1 -; RV32I-NEXT: sltu a0, a0, a1 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a2, a0, a1 -; RV32I-NEXT: sb a3, 3(sp) +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: sub a2, a1, s1 +; RV32I-NEXT: sltu a1, a1, a2 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a2, a1, a2 +; RV32I-NEXT: sb a0, 3(sp) ; RV32I-NEXT: addi a1, sp, 3 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 3(sp) -; RV32I-NEXT: beqz a0, .LBB4_1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 3(sp) +; RV32I-NEXT: beqz a1, .LBB4_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -741,12 +739,12 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) { ; RV32IA-LABEL: atomicrmw_usub_sat_i8: ; RV32IA: # %bb.0: ; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: lw a4, 0(a2) -; RV32IA-NEXT: andi a0, a0, 24 +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: li a4, 255 +; RV32IA-NEXT: andi a0, a3, 24 +; RV32IA-NEXT: sll a3, a4, a3 ; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: lw a4, 0(a2) ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: .LBB4_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Loop Header: Depth=1 @@ -786,25 +784,25 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) { ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s1, a1, 255 ; RV64I-NEXT: .LBB4_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: sub a1, a0, s1 -; RV64I-NEXT: sltu a0, a0, a1 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: sb a3, 7(sp) +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: sub a2, a1, s1 +; RV64I-NEXT: sltu a1, a1, a2 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a2, a1, a2 +; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: addi a1, sp, 7 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 7(sp) -; RV64I-NEXT: beqz a0, .LBB4_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 7(sp) +; RV64I-NEXT: beqz a1, .LBB4_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -818,38 +816,38 @@ define i8 @atomicrmw_usub_sat_i8(ptr %ptr, i8 %val) { ; RV64IA-LABEL: atomicrmw_usub_sat_i8: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: lw a3, 0(a2) -; RV64IA-NEXT: andi a0, a0, 24 -; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: li a4, 255 +; RV64IA-NEXT: andi a0, a3, 24 +; RV64IA-NEXT: sllw a3, a4, a3 +; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: lw a4, 0(a2) ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: .LBB4_1: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB4_3 Depth 2 -; RV64IA-NEXT: srlw a5, a3, a0 -; RV64IA-NEXT: sext.w a6, a3 +; RV64IA-NEXT: srlw a5, a4, a0 +; RV64IA-NEXT: sext.w a6, a4 ; RV64IA-NEXT: andi a5, a5, 255 ; RV64IA-NEXT: sub a7, a5, a1 ; RV64IA-NEXT: sltu a5, a5, a7 ; RV64IA-NEXT: addi a5, a5, -1 ; RV64IA-NEXT: and a5, a5, a7 ; RV64IA-NEXT: sllw a5, a5, a0 -; RV64IA-NEXT: and a3, a3, a4 -; RV64IA-NEXT: or a5, a3, a5 +; RV64IA-NEXT: and a4, a4, a3 +; RV64IA-NEXT: or a5, a4, a5 ; RV64IA-NEXT: .LBB4_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB4_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NEXT: bne a3, a6, .LBB4_1 +; RV64IA-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NEXT: bne a4, a6, .LBB4_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB4_3 Depth=2 ; RV64IA-NEXT: sc.w.rl a7, a5, (a2) ; RV64IA-NEXT: bnez a7, .LBB4_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a3, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: ret %result = atomicrmw usub_sat ptr %ptr, i8 %val seq_cst ret i8 %result @@ -869,27 +867,27 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s1, 16 ; RV32I-NEXT: addi s1, s1, -1 ; RV32I-NEXT: and s2, a1, s1 ; RV32I-NEXT: .LBB5_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a3, s1 -; RV32I-NEXT: sub a1, a0, s2 -; RV32I-NEXT: sltu a0, a0, a1 -; RV32I-NEXT: addi a0, a0, -1 -; RV32I-NEXT: and a2, a0, a1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: sub a2, a1, s2 +; RV32I-NEXT: sltu a1, a1, a2 +; RV32I-NEXT: addi a1, a1, -1 +; RV32I-NEXT: and a2, a1, a2 +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: beqz a0, .LBB5_1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: beqz a1, .LBB5_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -909,9 +907,9 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) { ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: andi a0, a4, 24 ; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: lw a5, 0(a2) ; RV32IA-NEXT: sll a4, a3, a4 ; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: lw a5, 0(a2) ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: .LBB5_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Loop Header: Depth=1 @@ -953,27 +951,27 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) { ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: .cfi_offset s2, -32 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s1, 16 ; RV64I-NEXT: addiw s1, s1, -1 ; RV64I-NEXT: and s2, a1, s1 ; RV64I-NEXT: .LBB5_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a3, s1 -; RV64I-NEXT: sub a1, a0, s2 -; RV64I-NEXT: sltu a0, a0, a1 -; RV64I-NEXT: addi a0, a0, -1 -; RV64I-NEXT: and a2, a0, a1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: and a1, a0, s1 +; RV64I-NEXT: sub a2, a1, s2 +; RV64I-NEXT: sltu a1, a1, a2 +; RV64I-NEXT: addi a1, a1, -1 +; RV64I-NEXT: and a2, a1, a2 +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: beqz a0, .LBB5_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: beqz a1, .LBB5_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -989,39 +987,39 @@ define i16 @atomicrmw_usub_sat_i16(ptr %ptr, i16 %val) { ; RV64IA-LABEL: atomicrmw_usub_sat_i16: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a5, a0, 3 +; RV64IA-NEXT: slli a4, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: andi a0, a5, 24 +; RV64IA-NEXT: andi a0, a4, 24 ; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: lw a4, 0(a2) -; RV64IA-NEXT: sllw a5, a3, a5 -; RV64IA-NEXT: not a5, a5 +; RV64IA-NEXT: sllw a4, a3, a4 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: lw a5, 0(a2) ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: .LBB5_1: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB5_3 Depth 2 -; RV64IA-NEXT: srlw a6, a4, a0 -; RV64IA-NEXT: sext.w a7, a4 +; RV64IA-NEXT: srlw a6, a5, a0 +; RV64IA-NEXT: sext.w a7, a5 ; RV64IA-NEXT: and a6, a6, a3 ; RV64IA-NEXT: sub t0, a6, a1 ; RV64IA-NEXT: sltu a6, a6, t0 ; RV64IA-NEXT: addi a6, a6, -1 ; RV64IA-NEXT: and a6, a6, t0 ; RV64IA-NEXT: sllw a6, a6, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: or a6, a4, a6 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: or a6, a5, a6 ; RV64IA-NEXT: .LBB5_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB5_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a4, (a2) -; RV64IA-NEXT: bne a4, a7, .LBB5_1 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: bne a5, a7, .LBB5_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB5_3 Depth=2 ; RV64IA-NEXT: sc.w.rl t0, a6, (a2) ; RV64IA-NEXT: bnez t0, .LBB5_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret %result = atomicrmw usub_sat ptr %ptr, i16 %val seq_cst ret i16 %result @@ -1038,25 +1036,25 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) { ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: .LBB6_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sub a0, a3, s1 -; RV32I-NEXT: sltu a1, a3, a0 -; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: and a2, a1, a0 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sub a1, a0, s0 +; RV32I-NEXT: sltu a2, a0, a1 +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: beqz a0, .LBB6_1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a1, .LBB6_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1102,25 +1100,25 @@ define i32 @atomicrmw_usub_sat_i32(ptr %ptr, i32 %val) { ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: .LBB6_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: subw a0, a3, s1 -; RV64I-NEXT: sltu a1, a3, a0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a2, a1, a0 -; RV64I-NEXT: sw a3, 4(sp) +; RV64I-NEXT: subw a1, a0, s0 +; RV64I-NEXT: sltu a2, a0, a1 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: addi a1, sp, 4 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 4(sp) -; RV64I-NEXT: beqz a0, .LBB6_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 4(sp) +; RV64I-NEXT: beqz a1, .LBB6_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -1173,42 +1171,41 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1 -; RV32I-NEXT: sltu a2, a5, a0 +; RV32I-NEXT: sltu a4, a1, a3 ; RV32I-NEXT: .LBB7_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_3 Depth=1 -; RV32I-NEXT: addi a3, a2, -1 -; RV32I-NEXT: and a2, a3, a1 -; RV32I-NEXT: and a3, a3, a0 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: and a2, a4, a2 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB7_5 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB7_5 ; RV32I-NEXT: .LBB7_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: sltu a0, a4, s2 -; RV32I-NEXT: sub a1, a5, s0 -; RV32I-NEXT: sub a0, a1, a0 -; RV32I-NEXT: sub a1, a4, s2 -; RV32I-NEXT: bne a0, a5, .LBB7_1 +; RV32I-NEXT: sltu a2, a0, s1 +; RV32I-NEXT: sub a3, a1, s0 +; RV32I-NEXT: sub a3, a3, a2 +; RV32I-NEXT: sub a2, a0, s1 +; RV32I-NEXT: bne a3, a1, .LBB7_1 ; RV32I-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1 -; RV32I-NEXT: sltu a2, a4, a1 +; RV32I-NEXT: sltu a4, a0, a2 ; RV32I-NEXT: j .LBB7_2 ; RV32I-NEXT: .LBB7_5: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1234,42 +1231,41 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB7_3 ; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1 -; RV32IA-NEXT: sltu a2, a5, a0 +; RV32IA-NEXT: sltu a4, a1, a3 ; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_3 Depth=1 -; RV32IA-NEXT: addi a3, a2, -1 -; RV32IA-NEXT: and a2, a3, a1 -; RV32IA-NEXT: and a3, a3, a0 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: addi a4, a4, -1 +; RV32IA-NEXT: and a2, a4, a2 +; RV32IA-NEXT: and a3, a4, a3 +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB7_5 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB7_5 ; RV32IA-NEXT: .LBB7_3: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: sltu a0, a4, s2 -; RV32IA-NEXT: sub a1, a5, s0 -; RV32IA-NEXT: sub a0, a1, a0 -; RV32IA-NEXT: sub a1, a4, s2 -; RV32IA-NEXT: bne a0, a5, .LBB7_1 +; RV32IA-NEXT: sltu a2, a0, s1 +; RV32IA-NEXT: sub a3, a1, s0 +; RV32IA-NEXT: sub a3, a3, a2 +; RV32IA-NEXT: sub a2, a0, s1 +; RV32IA-NEXT: bne a3, a1, .LBB7_1 ; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB7_3 Depth=1 -; RV32IA-NEXT: sltu a2, a4, a1 +; RV32IA-NEXT: sltu a4, a0, a2 ; RV32IA-NEXT: j .LBB7_2 ; RV32IA-NEXT: .LBB7_5: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1292,25 +1288,25 @@ define i64 @atomicrmw_usub_sat_i64(ptr %ptr, i64 %val) { ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: .LBB7_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: sub a0, a3, s1 -; RV64I-NEXT: sltu a1, a3, a0 -; RV64I-NEXT: addi a1, a1, -1 -; RV64I-NEXT: and a2, a1, a0 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sub a1, a0, s0 +; RV64I-NEXT: sltu a2, a0, a1 +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: beqz a0, .LBB7_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: beqz a1, .LBB7_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll index 3ff01e4987bd5..d67e047e8b05b 100644 --- a/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll +++ b/llvm/test/CodeGen/RISCV/atomicrmw-uinc-udec-wrap.ll @@ -25,25 +25,25 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s1, a1, 255 ; RV32I-NEXT: .LBB0_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: addi a0, a3, 1 -; RV32I-NEXT: andi a1, a3, 255 -; RV32I-NEXT: sltu a1, a1, s1 -; RV32I-NEXT: neg a2, a1 -; RV32I-NEXT: and a2, a2, a0 -; RV32I-NEXT: sb a3, 3(sp) +; RV32I-NEXT: addi a1, a0, 1 +; RV32I-NEXT: andi a2, a0, 255 +; RV32I-NEXT: sltu a2, a2, s1 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: sb a0, 3(sp) ; RV32I-NEXT: addi a1, sp, 3 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 3(sp) -; RV32I-NEXT: beqz a0, .LBB0_1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 3(sp) +; RV32I-NEXT: beqz a1, .LBB0_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -57,12 +57,12 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV32IA-LABEL: atomicrmw_uinc_wrap_i8: ; RV32IA: # %bb.0: ; RV32IA-NEXT: andi a2, a0, -4 -; RV32IA-NEXT: slli a0, a0, 3 -; RV32IA-NEXT: li a3, 255 -; RV32IA-NEXT: sll a3, a3, a0 -; RV32IA-NEXT: lw a4, 0(a2) -; RV32IA-NEXT: andi a0, a0, 24 +; RV32IA-NEXT: slli a3, a0, 3 +; RV32IA-NEXT: li a4, 255 +; RV32IA-NEXT: andi a0, a3, 24 +; RV32IA-NEXT: sll a3, a4, a3 ; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: lw a4, 0(a2) ; RV32IA-NEXT: andi a1, a1, 255 ; RV32IA-NEXT: .LBB0_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Loop Header: Depth=1 @@ -103,25 +103,25 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s1, a1, 255 ; RV64I-NEXT: .LBB0_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: addi a0, a3, 1 -; RV64I-NEXT: andi a1, a3, 255 -; RV64I-NEXT: sltu a1, a1, s1 -; RV64I-NEXT: neg a2, a1 -; RV64I-NEXT: and a2, a2, a0 -; RV64I-NEXT: sb a3, 7(sp) +; RV64I-NEXT: addi a1, a0, 1 +; RV64I-NEXT: andi a2, a0, 255 +; RV64I-NEXT: sltu a2, a2, s1 +; RV64I-NEXT: neg a2, a2 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: sb a0, 7(sp) ; RV64I-NEXT: addi a1, sp, 7 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 7(sp) -; RV64I-NEXT: beqz a0, .LBB0_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 7(sp) +; RV64I-NEXT: beqz a1, .LBB0_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -135,18 +135,18 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV64IA-LABEL: atomicrmw_uinc_wrap_i8: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a0, a0, 3 -; RV64IA-NEXT: li a3, 255 -; RV64IA-NEXT: sllw a4, a3, a0 -; RV64IA-NEXT: lw a3, 0(a2) -; RV64IA-NEXT: andi a0, a0, 24 -; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: li a4, 255 +; RV64IA-NEXT: andi a0, a3, 24 +; RV64IA-NEXT: sllw a3, a4, a3 +; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: lw a4, 0(a2) ; RV64IA-NEXT: andi a1, a1, 255 ; RV64IA-NEXT: .LBB0_1: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB0_3 Depth 2 -; RV64IA-NEXT: srlw a5, a3, a0 -; RV64IA-NEXT: sext.w a6, a3 +; RV64IA-NEXT: srlw a5, a4, a0 +; RV64IA-NEXT: sext.w a6, a4 ; RV64IA-NEXT: andi a7, a5, 255 ; RV64IA-NEXT: addi a5, a5, 1 ; RV64IA-NEXT: sltu a7, a7, a1 @@ -154,20 +154,20 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) { ; RV64IA-NEXT: and a5, a7, a5 ; RV64IA-NEXT: andi a5, a5, 255 ; RV64IA-NEXT: sllw a5, a5, a0 -; RV64IA-NEXT: and a3, a3, a4 -; RV64IA-NEXT: or a5, a3, a5 +; RV64IA-NEXT: and a4, a4, a3 +; RV64IA-NEXT: or a5, a4, a5 ; RV64IA-NEXT: .LBB0_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB0_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NEXT: bne a3, a6, .LBB0_1 +; RV64IA-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NEXT: bne a4, a6, .LBB0_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB0_3 Depth=2 ; RV64IA-NEXT: sc.w.rl a7, a5, (a2) ; RV64IA-NEXT: bnez a7, .LBB0_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a3, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst ret i8 %result @@ -187,27 +187,27 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lhu a3, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s1, 16 ; RV32I-NEXT: addi s1, s1, -1 ; RV32I-NEXT: and s2, a1, s1 ; RV32I-NEXT: .LBB1_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a3, s1 -; RV32I-NEXT: addi a1, a3, 1 -; RV32I-NEXT: sltu a0, a0, s2 -; RV32I-NEXT: neg a2, a0 -; RV32I-NEXT: and a2, a2, a1 -; RV32I-NEXT: sh a3, 14(sp) +; RV32I-NEXT: and a1, a0, s1 +; RV32I-NEXT: addi a2, a0, 1 +; RV32I-NEXT: sltu a1, a1, s2 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: and a2, a1, a2 +; RV32I-NEXT: sh a0, 14(sp) ; RV32I-NEXT: addi a1, sp, 14 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a3, 14(sp) -; RV32I-NEXT: beqz a0, .LBB1_1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 14(sp) +; RV32I-NEXT: beqz a1, .LBB1_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -227,9 +227,9 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: andi a0, a4, 24 ; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: lw a5, 0(a2) ; RV32IA-NEXT: sll a4, a3, a4 ; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: lw a5, 0(a2) ; RV32IA-NEXT: and a1, a1, a3 ; RV32IA-NEXT: .LBB1_1: # %atomicrmw.start ; RV32IA-NEXT: # =>This Loop Header: Depth=1 @@ -272,27 +272,27 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: .cfi_offset s2, -32 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lhu a3, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s1, 16 ; RV64I-NEXT: addiw s1, s1, -1 ; RV64I-NEXT: and s2, a1, s1 ; RV64I-NEXT: .LBB1_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a3, s1 -; RV64I-NEXT: addi a1, a3, 1 -; RV64I-NEXT: sltu a0, a0, s2 -; RV64I-NEXT: neg a2, a0 -; RV64I-NEXT: and a2, a2, a1 -; RV64I-NEXT: sh a3, 14(sp) +; RV64I-NEXT: and a1, a0, s1 +; RV64I-NEXT: addi a2, a0, 1 +; RV64I-NEXT: sltu a1, a1, s2 +; RV64I-NEXT: neg a1, a1 +; RV64I-NEXT: and a2, a1, a2 +; RV64I-NEXT: sh a0, 14(sp) ; RV64I-NEXT: addi a1, sp, 14 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a3, 14(sp) -; RV64I-NEXT: beqz a0, .LBB1_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 14(sp) +; RV64I-NEXT: beqz a1, .LBB1_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -308,19 +308,19 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV64IA-LABEL: atomicrmw_uinc_wrap_i16: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a5, a0, 3 +; RV64IA-NEXT: slli a4, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: andi a0, a5, 24 +; RV64IA-NEXT: andi a0, a4, 24 ; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: lw a4, 0(a2) -; RV64IA-NEXT: sllw a5, a3, a5 -; RV64IA-NEXT: not a5, a5 +; RV64IA-NEXT: sllw a4, a3, a4 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: lw a5, 0(a2) ; RV64IA-NEXT: and a1, a1, a3 ; RV64IA-NEXT: .LBB1_1: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB1_3 Depth 2 -; RV64IA-NEXT: srlw a6, a4, a0 -; RV64IA-NEXT: sext.w a7, a4 +; RV64IA-NEXT: srlw a6, a5, a0 +; RV64IA-NEXT: sext.w a7, a5 ; RV64IA-NEXT: and t0, a6, a3 ; RV64IA-NEXT: addi a6, a6, 1 ; RV64IA-NEXT: sltu t0, t0, a1 @@ -328,20 +328,20 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) { ; RV64IA-NEXT: negw t0, t0 ; RV64IA-NEXT: and a6, t0, a6 ; RV64IA-NEXT: sllw a6, a6, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: or a6, a4, a6 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: or a6, a5, a6 ; RV64IA-NEXT: .LBB1_3: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB1_1 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a4, (a2) -; RV64IA-NEXT: bne a4, a7, .LBB1_1 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: bne a5, a7, .LBB1_1 ; RV64IA-NEXT: # %bb.4: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB1_3 Depth=2 ; RV64IA-NEXT: sc.w.rl t0, a6, (a2) ; RV64IA-NEXT: bnez t0, .LBB1_3 ; RV64IA-NEXT: # %bb.5: # %atomicrmw.start ; RV64IA-NEXT: # %bb.2: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret %result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst ret i16 %result @@ -358,25 +358,25 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: .LBB2_1: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: addi a0, a3, 1 -; RV32I-NEXT: sltu a1, a3, s1 -; RV32I-NEXT: neg a2, a1 -; RV32I-NEXT: and a2, a2, a0 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: addi a1, a0, 1 +; RV32I-NEXT: sltu a2, a0, s0 +; RV32I-NEXT: neg a2, a2 +; RV32I-NEXT: and a2, a2, a1 +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: beqz a0, .LBB2_1 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: beqz a1, .LBB2_1 ; RV32I-NEXT: # %bb.2: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -423,24 +423,24 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) { ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s1, a1 ; RV64I-NEXT: .LBB2_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: addiw a0, a3, 1 -; RV64I-NEXT: sltu a1, a3, s1 -; RV64I-NEXT: neg a2, a1 -; RV64I-NEXT: and a2, a2, a0 -; RV64I-NEXT: sw a3, 4(sp) +; RV64I-NEXT: addiw a1, a0, 1 +; RV64I-NEXT: sltu a2, a0, s1 +; RV64I-NEXT: neg a2, a2 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: sw a0, 4(sp) ; RV64I-NEXT: addi a1, sp, 4 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 4(sp) -; RV64I-NEXT: beqz a0, .LBB2_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 4(sp) +; RV64I-NEXT: beqz a1, .LBB2_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -494,41 +494,40 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: lw a5, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a5, s0 +; RV32I-NEXT: sltu a2, a1, s0 ; RV32I-NEXT: .LBB3_2: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: addi a1, a4, 1 -; RV32I-NEXT: neg a0, a0 -; RV32I-NEXT: seqz a3, a1 -; RV32I-NEXT: and a2, a0, a1 -; RV32I-NEXT: add a3, a5, a3 -; RV32I-NEXT: and a3, a0, a3 -; RV32I-NEXT: sw a4, 8(sp) -; RV32I-NEXT: sw a5, 12(sp) +; RV32I-NEXT: addi a3, a0, 1 +; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: seqz a5, a3 +; RV32I-NEXT: and a2, a4, a3 +; RV32I-NEXT: add a3, a1, a5 +; RV32I-NEXT: and a3, a4, a3 +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a4, 8(sp) -; RV32I-NEXT: lw a5, 12(sp) -; RV32I-NEXT: bnez a0, .LBB3_5 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB3_5 ; RV32I-NEXT: .LBB3_3: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: bne a5, s0, .LBB3_1 +; RV32I-NEXT: bne a1, s0, .LBB3_1 ; RV32I-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32I-NEXT: sltu a0, a4, s2 +; RV32I-NEXT: sltu a2, a0, s1 ; RV32I-NEXT: j .LBB3_2 ; RV32I-NEXT: .LBB3_5: # %atomicrmw.end -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -554,41 +553,40 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a4, 0(a0) -; RV32IA-NEXT: lw a5, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB3_3 ; RV32IA-NEXT: .LBB3_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a5, s0 +; RV32IA-NEXT: sltu a2, a1, s0 ; RV32IA-NEXT: .LBB3_2: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: addi a1, a4, 1 -; RV32IA-NEXT: neg a0, a0 -; RV32IA-NEXT: seqz a3, a1 -; RV32IA-NEXT: and a2, a0, a1 -; RV32IA-NEXT: add a3, a5, a3 -; RV32IA-NEXT: and a3, a0, a3 -; RV32IA-NEXT: sw a4, 8(sp) -; RV32IA-NEXT: sw a5, 12(sp) +; RV32IA-NEXT: addi a3, a0, 1 +; RV32IA-NEXT: neg a4, a2 +; RV32IA-NEXT: seqz a5, a3 +; RV32IA-NEXT: and a2, a4, a3 +; RV32IA-NEXT: add a3, a1, a5 +; RV32IA-NEXT: and a3, a4, a3 +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a4, 8(sp) -; RV32IA-NEXT: lw a5, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB3_5 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB3_5 ; RV32IA-NEXT: .LBB3_3: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: bne a5, s0, .LBB3_1 +; RV32IA-NEXT: bne a1, s0, .LBB3_1 ; RV32IA-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 -; RV32IA-NEXT: sltu a0, a4, s2 +; RV32IA-NEXT: sltu a2, a0, s1 ; RV32IA-NEXT: j .LBB3_2 ; RV32IA-NEXT: .LBB3_5: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a4 -; RV32IA-NEXT: mv a1, a5 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -611,25 +609,25 @@ define i64 @atomicrmw_uinc_wrap_i64(ptr %ptr, i64 %val) { ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: .LBB3_1: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: addi a0, a3, 1 -; RV64I-NEXT: sltu a1, a3, s1 -; RV64I-NEXT: neg a2, a1 -; RV64I-NEXT: and a2, a2, a0 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: addi a1, a0, 1 +; RV64I-NEXT: sltu a2, a0, s0 +; RV64I-NEXT: neg a2, a2 +; RV64I-NEXT: and a2, a2, a1 +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: beqz a0, .LBB3_1 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: beqz a1, .LBB3_1 ; RV64I-NEXT: # %bb.2: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -681,35 +679,35 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lbu a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lbu a0, 0(a0) ; RV32I-NEXT: andi s2, a1, 255 ; RV32I-NEXT: j .LBB4_2 ; RV32I-NEXT: .LBB4_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB4_2 Depth=1 -; RV32I-NEXT: sb a3, 15(sp) +; RV32I-NEXT: sb a0, 15(sp) ; RV32I-NEXT: addi a1, sp, 15 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_1 -; RV32I-NEXT: lbu a3, 15(sp) -; RV32I-NEXT: bnez a0, .LBB4_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lbu a0, 15(sp) +; RV32I-NEXT: bnez a1, .LBB4_4 ; RV32I-NEXT: .LBB4_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: andi a0, a3, 255 -; RV32I-NEXT: seqz a1, a0 -; RV32I-NEXT: sltu a0, s2, a0 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: bnez a0, .LBB4_1 +; RV32I-NEXT: andi a1, a0, 255 +; RV32I-NEXT: seqz a2, a1 +; RV32I-NEXT: sltu a1, s2, a1 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: bnez a1, .LBB4_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB4_2 Depth=1 -; RV32I-NEXT: addi a2, a3, -1 +; RV32I-NEXT: addi a2, a0, -1 ; RV32I-NEXT: j .LBB4_1 ; RV32I-NEXT: .LBB4_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -728,9 +726,9 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; RV32IA-NEXT: slli a3, a0, 3 ; RV32IA-NEXT: li a4, 255 ; RV32IA-NEXT: andi a0, a3, 24 -; RV32IA-NEXT: lw a6, 0(a2) ; RV32IA-NEXT: sll a3, a4, a3 ; RV32IA-NEXT: not a3, a3 +; RV32IA-NEXT: lw a6, 0(a2) ; RV32IA-NEXT: andi a4, a1, 255 ; RV32IA-NEXT: j .LBB4_2 ; RV32IA-NEXT: .LBB4_1: # %atomicrmw.start @@ -782,35 +780,35 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: .cfi_offset s2, -32 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lbu a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lbu a0, 0(a0) ; RV64I-NEXT: andi s2, a1, 255 ; RV64I-NEXT: j .LBB4_2 ; RV64I-NEXT: .LBB4_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB4_2 Depth=1 -; RV64I-NEXT: sb a3, 15(sp) +; RV64I-NEXT: sb a0, 15(sp) ; RV64I-NEXT: addi a1, sp, 15 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_1 -; RV64I-NEXT: lbu a3, 15(sp) -; RV64I-NEXT: bnez a0, .LBB4_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lbu a0, 15(sp) +; RV64I-NEXT: bnez a1, .LBB4_4 ; RV64I-NEXT: .LBB4_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: andi a0, a3, 255 -; RV64I-NEXT: seqz a1, a0 -; RV64I-NEXT: sltu a0, s2, a0 -; RV64I-NEXT: or a0, a1, a0 -; RV64I-NEXT: mv a2, s1 -; RV64I-NEXT: bnez a0, .LBB4_1 +; RV64I-NEXT: andi a1, a0, 255 +; RV64I-NEXT: seqz a2, a1 +; RV64I-NEXT: sltu a1, s2, a1 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: bnez a1, .LBB4_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB4_2 Depth=1 -; RV64I-NEXT: addi a2, a3, -1 +; RV64I-NEXT: addi a2, a0, -1 ; RV64I-NEXT: j .LBB4_1 ; RV64I-NEXT: .LBB4_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -826,37 +824,37 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; RV64IA-LABEL: atomicrmw_udec_wrap_i8: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a4, a0, 3 -; RV64IA-NEXT: li a5, 255 -; RV64IA-NEXT: andi a0, a4, 24 -; RV64IA-NEXT: lw a3, 0(a2) -; RV64IA-NEXT: sllw a4, a5, a4 -; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: slli a3, a0, 3 +; RV64IA-NEXT: li a4, 255 +; RV64IA-NEXT: andi a0, a3, 24 +; RV64IA-NEXT: sllw a3, a4, a3 +; RV64IA-NEXT: not a3, a3 +; RV64IA-NEXT: lw a4, 0(a2) ; RV64IA-NEXT: andi a5, a1, 255 ; RV64IA-NEXT: j .LBB4_2 ; RV64IA-NEXT: .LBB4_1: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB4_2 Depth=1 -; RV64IA-NEXT: sext.w a6, a3 +; RV64IA-NEXT: sext.w a6, a4 ; RV64IA-NEXT: andi a7, a7, 255 ; RV64IA-NEXT: sllw a7, a7, a0 -; RV64IA-NEXT: and a3, a3, a4 -; RV64IA-NEXT: or a7, a3, a7 +; RV64IA-NEXT: and a4, a4, a3 +; RV64IA-NEXT: or a7, a4, a7 ; RV64IA-NEXT: .LBB4_5: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB4_2 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a3, (a2) -; RV64IA-NEXT: bne a3, a6, .LBB4_7 +; RV64IA-NEXT: lr.w.aqrl a4, (a2) +; RV64IA-NEXT: bne a4, a6, .LBB4_7 ; RV64IA-NEXT: # %bb.6: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB4_5 Depth=2 ; RV64IA-NEXT: sc.w.rl t0, a7, (a2) ; RV64IA-NEXT: bnez t0, .LBB4_5 ; RV64IA-NEXT: .LBB4_7: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB4_2 Depth=1 -; RV64IA-NEXT: beq a3, a6, .LBB4_4 +; RV64IA-NEXT: beq a4, a6, .LBB4_4 ; RV64IA-NEXT: .LBB4_2: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB4_5 Depth 2 -; RV64IA-NEXT: srlw a6, a3, a0 +; RV64IA-NEXT: srlw a6, a4, a0 ; RV64IA-NEXT: andi a7, a6, 255 ; RV64IA-NEXT: seqz t0, a7 ; RV64IA-NEXT: sltu a7, a5, a7 @@ -868,7 +866,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) { ; RV64IA-NEXT: addi a7, a6, -1 ; RV64IA-NEXT: j .LBB4_1 ; RV64IA-NEXT: .LBB4_4: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a3, a0 +; RV64IA-NEXT: srlw a0, a4, a0 ; RV64IA-NEXT: ret %result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst ret i8 %result @@ -891,35 +889,35 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; RV32I-NEXT: .cfi_offset s3, -20 ; RV32I-NEXT: mv s0, a1 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lhu a1, 0(a0) +; RV32I-NEXT: lhu a0, 0(a0) ; RV32I-NEXT: lui s2, 16 ; RV32I-NEXT: addi s2, s2, -1 -; RV32I-NEXT: and s3, s0, s2 +; RV32I-NEXT: and s3, a1, s2 ; RV32I-NEXT: j .LBB5_2 ; RV32I-NEXT: .LBB5_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV32I-NEXT: sh a1, 10(sp) +; RV32I-NEXT: sh a0, 10(sp) ; RV32I-NEXT: addi a1, sp, 10 ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_2 -; RV32I-NEXT: lh a1, 10(sp) -; RV32I-NEXT: bnez a0, .LBB5_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lh a0, 10(sp) +; RV32I-NEXT: bnez a1, .LBB5_4 ; RV32I-NEXT: .LBB5_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: and a0, a1, s2 -; RV32I-NEXT: seqz a2, a0 -; RV32I-NEXT: sltu a0, s3, a0 -; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: and a1, a0, s2 +; RV32I-NEXT: seqz a2, a1 +; RV32I-NEXT: sltu a1, s3, a1 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: mv a2, s0 -; RV32I-NEXT: bnez a0, .LBB5_1 +; RV32I-NEXT: bnez a1, .LBB5_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV32I-NEXT: addi a2, a1, -1 +; RV32I-NEXT: addi a2, a0, -1 ; RV32I-NEXT: j .LBB5_1 ; RV32I-NEXT: .LBB5_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -941,9 +939,9 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; RV32IA-NEXT: lui a3, 16 ; RV32IA-NEXT: andi a0, a4, 24 ; RV32IA-NEXT: addi a3, a3, -1 -; RV32IA-NEXT: lw a7, 0(a2) ; RV32IA-NEXT: sll a4, a3, a4 ; RV32IA-NEXT: not a4, a4 +; RV32IA-NEXT: lw a7, 0(a2) ; RV32IA-NEXT: and a5, a1, a3 ; RV32IA-NEXT: j .LBB5_2 ; RV32IA-NEXT: .LBB5_1: # %atomicrmw.start @@ -999,35 +997,35 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; RV64I-NEXT: .cfi_offset s3, -40 ; RV64I-NEXT: mv s0, a1 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: lhu a1, 0(a0) +; RV64I-NEXT: lhu a0, 0(a0) ; RV64I-NEXT: lui s2, 16 ; RV64I-NEXT: addiw s2, s2, -1 -; RV64I-NEXT: and s3, s0, s2 +; RV64I-NEXT: and s3, a1, s2 ; RV64I-NEXT: j .LBB5_2 ; RV64I-NEXT: .LBB5_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV64I-NEXT: sh a1, 6(sp) +; RV64I-NEXT: sh a0, 6(sp) ; RV64I-NEXT: addi a1, sp, 6 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_2 -; RV64I-NEXT: lh a1, 6(sp) -; RV64I-NEXT: bnez a0, .LBB5_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lh a0, 6(sp) +; RV64I-NEXT: bnez a1, .LBB5_4 ; RV64I-NEXT: .LBB5_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: and a0, a1, s2 -; RV64I-NEXT: seqz a2, a0 -; RV64I-NEXT: sltu a0, s3, a0 -; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: and a1, a0, s2 +; RV64I-NEXT: seqz a2, a1 +; RV64I-NEXT: sltu a1, s3, a1 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: mv a2, s0 -; RV64I-NEXT: bnez a0, .LBB5_1 +; RV64I-NEXT: bnez a1, .LBB5_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV64I-NEXT: addi a2, a1, -1 +; RV64I-NEXT: addi a2, a0, -1 ; RV64I-NEXT: j .LBB5_1 ; RV64I-NEXT: .LBB5_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1045,38 +1043,38 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; RV64IA-LABEL: atomicrmw_udec_wrap_i16: ; RV64IA: # %bb.0: ; RV64IA-NEXT: andi a2, a0, -4 -; RV64IA-NEXT: slli a5, a0, 3 +; RV64IA-NEXT: slli a4, a0, 3 ; RV64IA-NEXT: lui a3, 16 -; RV64IA-NEXT: andi a0, a5, 24 +; RV64IA-NEXT: andi a0, a4, 24 ; RV64IA-NEXT: addiw a3, a3, -1 -; RV64IA-NEXT: lw a4, 0(a2) -; RV64IA-NEXT: sllw a5, a3, a5 -; RV64IA-NEXT: not a5, a5 +; RV64IA-NEXT: sllw a4, a3, a4 +; RV64IA-NEXT: not a4, a4 +; RV64IA-NEXT: lw a5, 0(a2) ; RV64IA-NEXT: and a6, a1, a3 ; RV64IA-NEXT: j .LBB5_2 ; RV64IA-NEXT: .LBB5_1: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV64IA-NEXT: sext.w a7, a4 +; RV64IA-NEXT: sext.w a7, a5 ; RV64IA-NEXT: and t0, t0, a3 ; RV64IA-NEXT: sllw t0, t0, a0 -; RV64IA-NEXT: and a4, a4, a5 -; RV64IA-NEXT: or t0, a4, t0 +; RV64IA-NEXT: and a5, a5, a4 +; RV64IA-NEXT: or t0, a5, t0 ; RV64IA-NEXT: .LBB5_5: # %atomicrmw.start ; RV64IA-NEXT: # Parent Loop BB5_2 Depth=1 ; RV64IA-NEXT: # => This Inner Loop Header: Depth=2 -; RV64IA-NEXT: lr.w.aqrl a4, (a2) -; RV64IA-NEXT: bne a4, a7, .LBB5_7 +; RV64IA-NEXT: lr.w.aqrl a5, (a2) +; RV64IA-NEXT: bne a5, a7, .LBB5_7 ; RV64IA-NEXT: # %bb.6: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB5_5 Depth=2 ; RV64IA-NEXT: sc.w.rl t1, t0, (a2) ; RV64IA-NEXT: bnez t1, .LBB5_5 ; RV64IA-NEXT: .LBB5_7: # %atomicrmw.start ; RV64IA-NEXT: # in Loop: Header=BB5_2 Depth=1 -; RV64IA-NEXT: beq a4, a7, .LBB5_4 +; RV64IA-NEXT: beq a5, a7, .LBB5_4 ; RV64IA-NEXT: .LBB5_2: # %atomicrmw.start ; RV64IA-NEXT: # =>This Loop Header: Depth=1 ; RV64IA-NEXT: # Child Loop BB5_5 Depth 2 -; RV64IA-NEXT: srlw a7, a4, a0 +; RV64IA-NEXT: srlw a7, a5, a0 ; RV64IA-NEXT: and t0, a7, a3 ; RV64IA-NEXT: seqz t1, t0 ; RV64IA-NEXT: sltu t0, a6, t0 @@ -1088,7 +1086,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) { ; RV64IA-NEXT: addi t0, a7, -1 ; RV64IA-NEXT: j .LBB5_1 ; RV64IA-NEXT: .LBB5_4: # %atomicrmw.end -; RV64IA-NEXT: srlw a0, a4, a0 +; RV64IA-NEXT: srlw a0, a5, a0 ; RV64IA-NEXT: ret %result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst ret i16 %result @@ -1105,33 +1103,33 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: mv s1, a0 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: j .LBB6_2 ; RV32I-NEXT: .LBB6_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB6_2 Depth=1 -; RV32I-NEXT: sw a3, 0(sp) +; RV32I-NEXT: sw a0, 0(sp) ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: li a3, 5 ; RV32I-NEXT: li a4, 5 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __atomic_compare_exchange_4 -; RV32I-NEXT: lw a3, 0(sp) -; RV32I-NEXT: bnez a0, .LBB6_4 +; RV32I-NEXT: mv a1, a0 +; RV32I-NEXT: lw a0, 0(sp) +; RV32I-NEXT: bnez a1, .LBB6_4 ; RV32I-NEXT: .LBB6_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: seqz a0, a3 -; RV32I-NEXT: sltu a1, s1, a3 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: mv a2, s1 -; RV32I-NEXT: bnez a0, .LBB6_1 +; RV32I-NEXT: seqz a1, a0 +; RV32I-NEXT: sltu a2, s0, a0 +; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: mv a2, s0 +; RV32I-NEXT: bnez a1, .LBB6_1 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB6_2 Depth=1 -; RV32I-NEXT: addi a2, a3, -1 +; RV32I-NEXT: addi a2, a0, -1 ; RV32I-NEXT: j .LBB6_1 ; RV32I-NEXT: .LBB6_4: # %atomicrmw.end -; RV32I-NEXT: mv a0, a3 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1189,34 +1187,34 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) { ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 ; RV64I-NEXT: .cfi_offset s2, -32 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: lw a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: lw a0, 0(a0) ; RV64I-NEXT: sext.w s2, a1 ; RV64I-NEXT: j .LBB6_2 ; RV64I-NEXT: .LBB6_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB6_2 Depth=1 -; RV64I-NEXT: sw a3, 12(sp) +; RV64I-NEXT: sw a0, 12(sp) ; RV64I-NEXT: addi a1, sp, 12 ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_4 -; RV64I-NEXT: lw a3, 12(sp) -; RV64I-NEXT: bnez a0, .LBB6_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: lw a0, 12(sp) +; RV64I-NEXT: bnez a1, .LBB6_4 ; RV64I-NEXT: .LBB6_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: seqz a0, a3 -; RV64I-NEXT: sltu a1, s2, a3 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: mv a2, s1 -; RV64I-NEXT: bnez a0, .LBB6_1 +; RV64I-NEXT: seqz a1, a0 +; RV64I-NEXT: sltu a2, s2, a0 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: bnez a1, .LBB6_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB6_2 Depth=1 -; RV64I-NEXT: addiw a2, a3, -1 +; RV64I-NEXT: addiw a2, a0, -1 ; RV64I-NEXT: j .LBB6_1 ; RV64I-NEXT: .LBB6_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1282,49 +1280,48 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32I-NEXT: .cfi_offset s1, -12 ; RV32I-NEXT: .cfi_offset s2, -16 ; RV32I-NEXT: mv s0, a2 -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: lw a5, 0(a0) -; RV32I-NEXT: lw a4, 4(a0) -; RV32I-NEXT: mv s2, a1 +; RV32I-NEXT: mv s1, a1 +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: lw a0, 0(a0) +; RV32I-NEXT: lw a1, 4(s2) ; RV32I-NEXT: j .LBB7_2 ; RV32I-NEXT: .LBB7_1: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a4, 12(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 5 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __atomic_compare_exchange_8 -; RV32I-NEXT: lw a5, 8(sp) -; RV32I-NEXT: lw a4, 12(sp) -; RV32I-NEXT: bnez a0, .LBB7_7 +; RV32I-NEXT: mv a2, a0 +; RV32I-NEXT: lw a0, 8(sp) +; RV32I-NEXT: lw a1, 12(sp) +; RV32I-NEXT: bnez a2, .LBB7_7 ; RV32I-NEXT: .LBB7_2: # %atomicrmw.start ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: beq a4, s0, .LBB7_4 +; RV32I-NEXT: beq a1, s0, .LBB7_4 ; RV32I-NEXT: # %bb.3: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sltu a0, s0, a4 +; RV32I-NEXT: sltu a2, s0, a1 ; RV32I-NEXT: j .LBB7_5 ; RV32I-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: sltu a0, s2, a5 +; RV32I-NEXT: sltu a2, s1, a0 ; RV32I-NEXT: .LBB7_5: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: or a1, a5, a4 -; RV32I-NEXT: seqz a1, a1 -; RV32I-NEXT: or a0, a1, a0 -; RV32I-NEXT: mv a2, s2 +; RV32I-NEXT: or a3, a0, a1 +; RV32I-NEXT: seqz a3, a3 +; RV32I-NEXT: or a4, a3, a2 +; RV32I-NEXT: mv a2, s1 ; RV32I-NEXT: mv a3, s0 -; RV32I-NEXT: bnez a0, .LBB7_1 +; RV32I-NEXT: bnez a4, .LBB7_1 ; RV32I-NEXT: # %bb.6: # %atomicrmw.start ; RV32I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32I-NEXT: seqz a0, a5 -; RV32I-NEXT: sub a3, a4, a0 -; RV32I-NEXT: addi a2, a5, -1 +; RV32I-NEXT: seqz a2, a0 +; RV32I-NEXT: sub a3, a1, a2 +; RV32I-NEXT: addi a2, a0, -1 ; RV32I-NEXT: j .LBB7_1 ; RV32I-NEXT: .LBB7_7: # %atomicrmw.end -; RV32I-NEXT: mv a0, a5 -; RV32I-NEXT: mv a1, a4 ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1350,49 +1347,48 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV32IA-NEXT: .cfi_offset s1, -12 ; RV32IA-NEXT: .cfi_offset s2, -16 ; RV32IA-NEXT: mv s0, a2 -; RV32IA-NEXT: mv s1, a0 -; RV32IA-NEXT: lw a5, 0(a0) -; RV32IA-NEXT: lw a4, 4(a0) -; RV32IA-NEXT: mv s2, a1 +; RV32IA-NEXT: mv s1, a1 +; RV32IA-NEXT: mv s2, a0 +; RV32IA-NEXT: lw a0, 0(a0) +; RV32IA-NEXT: lw a1, 4(s2) ; RV32IA-NEXT: j .LBB7_2 ; RV32IA-NEXT: .LBB7_1: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sw a5, 8(sp) -; RV32IA-NEXT: sw a4, 12(sp) +; RV32IA-NEXT: sw a0, 8(sp) +; RV32IA-NEXT: sw a1, 12(sp) ; RV32IA-NEXT: addi a1, sp, 8 ; RV32IA-NEXT: li a4, 5 ; RV32IA-NEXT: li a5, 5 -; RV32IA-NEXT: mv a0, s1 +; RV32IA-NEXT: mv a0, s2 ; RV32IA-NEXT: call __atomic_compare_exchange_8 -; RV32IA-NEXT: lw a5, 8(sp) -; RV32IA-NEXT: lw a4, 12(sp) -; RV32IA-NEXT: bnez a0, .LBB7_7 +; RV32IA-NEXT: mv a2, a0 +; RV32IA-NEXT: lw a0, 8(sp) +; RV32IA-NEXT: lw a1, 12(sp) +; RV32IA-NEXT: bnez a2, .LBB7_7 ; RV32IA-NEXT: .LBB7_2: # %atomicrmw.start ; RV32IA-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32IA-NEXT: beq a4, s0, .LBB7_4 +; RV32IA-NEXT: beq a1, s0, .LBB7_4 ; RV32IA-NEXT: # %bb.3: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sltu a0, s0, a4 +; RV32IA-NEXT: sltu a2, s0, a1 ; RV32IA-NEXT: j .LBB7_5 ; RV32IA-NEXT: .LBB7_4: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: sltu a0, s2, a5 +; RV32IA-NEXT: sltu a2, s1, a0 ; RV32IA-NEXT: .LBB7_5: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: or a1, a5, a4 -; RV32IA-NEXT: seqz a1, a1 -; RV32IA-NEXT: or a0, a1, a0 -; RV32IA-NEXT: mv a2, s2 +; RV32IA-NEXT: or a3, a0, a1 +; RV32IA-NEXT: seqz a3, a3 +; RV32IA-NEXT: or a4, a3, a2 +; RV32IA-NEXT: mv a2, s1 ; RV32IA-NEXT: mv a3, s0 -; RV32IA-NEXT: bnez a0, .LBB7_1 +; RV32IA-NEXT: bnez a4, .LBB7_1 ; RV32IA-NEXT: # %bb.6: # %atomicrmw.start ; RV32IA-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV32IA-NEXT: seqz a0, a5 -; RV32IA-NEXT: sub a3, a4, a0 -; RV32IA-NEXT: addi a2, a5, -1 +; RV32IA-NEXT: seqz a2, a0 +; RV32IA-NEXT: sub a3, a1, a2 +; RV32IA-NEXT: addi a2, a0, -1 ; RV32IA-NEXT: j .LBB7_1 ; RV32IA-NEXT: .LBB7_7: # %atomicrmw.end -; RV32IA-NEXT: mv a0, a5 -; RV32IA-NEXT: mv a1, a4 ; RV32IA-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32IA-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -1415,33 +1411,33 @@ define i64 @atomicrmw_udec_wrap_i64(ptr %ptr, i64 %val) { ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: ld a3, 0(a0) -; RV64I-NEXT: mv s1, a1 +; RV64I-NEXT: mv s0, a1 +; RV64I-NEXT: mv s1, a0 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: j .LBB7_2 ; RV64I-NEXT: .LBB7_1: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV64I-NEXT: sd a3, 0(sp) +; RV64I-NEXT: sd a0, 0(sp) ; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: li a3, 5 ; RV64I-NEXT: li a4, 5 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __atomic_compare_exchange_8 -; RV64I-NEXT: ld a3, 0(sp) -; RV64I-NEXT: bnez a0, .LBB7_4 +; RV64I-NEXT: mv a1, a0 +; RV64I-NEXT: ld a0, 0(sp) +; RV64I-NEXT: bnez a1, .LBB7_4 ; RV64I-NEXT: .LBB7_2: # %atomicrmw.start ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: seqz a0, a3 -; RV64I-NEXT: sltu a1, s1, a3 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: mv a2, s1 -; RV64I-NEXT: bnez a0, .LBB7_1 +; RV64I-NEXT: seqz a1, a0 +; RV64I-NEXT: sltu a2, s0, a0 +; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: mv a2, s0 +; RV64I-NEXT: bnez a1, .LBB7_1 ; RV64I-NEXT: # %bb.3: # %atomicrmw.start ; RV64I-NEXT: # in Loop: Header=BB7_2 Depth=1 -; RV64I-NEXT: addi a2, a3, -1 +; RV64I-NEXT: addi a2, a0, -1 ; RV64I-NEXT: j .LBB7_1 ; RV64I-NEXT: .LBB7_4: # %atomicrmw.end -; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/bf16-promote.ll b/llvm/test/CodeGen/RISCV/bf16-promote.ll index 08c053fab4f67..b3f04975d04c4 100644 --- a/llvm/test/CodeGen/RISCV/bf16-promote.ll +++ b/llvm/test/CodeGen/RISCV/bf16-promote.ll @@ -111,12 +111,12 @@ define void @test_fadd(ptr %p, ptr %q) nounwind { ; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64-NEXT: mv s0, a0 -; RV64-NEXT: lhu a0, 0(a1) -; RV64-NEXT: lhu a1, 0(s0) -; RV64-NEXT: slli a0, a0, 16 +; RV64-NEXT: lhu a0, 0(a0) +; RV64-NEXT: lhu a1, 0(a1) ; RV64-NEXT: slli a1, a1, 16 -; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: fmv.w.x fa4, a1 +; RV64-NEXT: slli a0, a0, 16 +; RV64-NEXT: fmv.w.x fa5, a1 +; RV64-NEXT: fmv.w.x fa4, a0 ; RV64-NEXT: fadd.s fa0, fa4, fa5 ; RV64-NEXT: call __truncsfbf2 ; RV64-NEXT: fmv.x.w a0, fa0 @@ -132,12 +132,12 @@ define void @test_fadd(ptr %p, ptr %q) nounwind { ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lhu a0, 0(a1) -; RV32-NEXT: lhu a1, 0(s0) -; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: lhu a0, 0(a0) +; RV32-NEXT: lhu a1, 0(a1) ; RV32-NEXT: slli a1, a1, 16 -; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: fmv.w.x fa4, a1 +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: fmv.w.x fa5, a1 +; RV32-NEXT: fmv.w.x fa4, a0 ; RV32-NEXT: fadd.s fa0, fa4, fa5 ; RV32-NEXT: call __truncsfbf2 ; RV32-NEXT: fmv.x.w a0, fa0 diff --git a/llvm/test/CodeGen/RISCV/bfloat-convert.ll b/llvm/test/CodeGen/RISCV/bfloat-convert.ll index 82359769c7c22..8621b3e980a04 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-convert.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-convert.ll @@ -51,13 +51,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_si_bf16_sat: ; CHECK32ZFBFMIN: # %bb.0: # %start ; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK32ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK32ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; CHECK32ZFBFMIN-NEXT: lui a0, 815104 -; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, a0 -; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK32ZFBFMIN-NEXT: neg a0, a1 +; CHECK32ZFBFMIN-NEXT: lui a1, %hi(.LCPI1_0) +; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32ZFBFMIN-NEXT: neg a0, a0 +; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32ZFBFMIN-NEXT: fcvt.w.s a1, fa5, rtz ; CHECK32ZFBFMIN-NEXT: and a0, a0, a1 @@ -70,11 +70,11 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: fmv.w.x fa5, a1 ; RV32ID-NEXT: lui a1, %hi(.LCPI1_0) ; RV32ID-NEXT: slli a0, a0, 16 -; RV32ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1) -; RV32ID-NEXT: fmv.w.x fa3, a0 -; RV32ID-NEXT: feq.s a0, fa3, fa3 -; RV32ID-NEXT: fmax.s fa5, fa3, fa5 +; RV32ID-NEXT: fmv.w.x fa4, a0 +; RV32ID-NEXT: feq.s a0, fa4, fa4 +; RV32ID-NEXT: fmax.s fa5, fa4, fa5 ; RV32ID-NEXT: neg a0, a0 +; RV32ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-NEXT: and a0, a0, a1 @@ -83,13 +83,13 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind { ; CHECK64ZFBFMIN-LABEL: fcvt_si_bf16_sat: ; CHECK64ZFBFMIN: # %bb.0: # %start ; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 -; CHECK64ZFBFMIN-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK64ZFBFMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; CHECK64ZFBFMIN-NEXT: lui a0, 815104 -; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, a0 -; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK64ZFBFMIN-NEXT: neg a0, a1 +; CHECK64ZFBFMIN-NEXT: lui a1, %hi(.LCPI1_0) +; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64ZFBFMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64ZFBFMIN-NEXT: neg a0, a0 +; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64ZFBFMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64ZFBFMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64ZFBFMIN-NEXT: and a0, a0, a1 @@ -102,11 +102,11 @@ define i16 @fcvt_si_bf16_sat(bfloat %a) nounwind { ; RV64ID-NEXT: fmv.w.x fa5, a1 ; RV64ID-NEXT: lui a1, %hi(.LCPI1_0) ; RV64ID-NEXT: slli a0, a0, 16 -; RV64ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1) -; RV64ID-NEXT: fmv.w.x fa3, a0 -; RV64ID-NEXT: feq.s a0, fa3, fa3 -; RV64ID-NEXT: fmax.s fa5, fa3, fa5 +; RV64ID-NEXT: fmv.w.x fa4, a0 +; RV64ID-NEXT: feq.s a0, fa4, fa4 +; RV64ID-NEXT: fmax.s fa5, fa4, fa5 ; RV64ID-NEXT: neg a0, a0 +; RV64ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-NEXT: and a0, a0, a1 @@ -152,49 +152,49 @@ define i16 @fcvt_ui_bf16(bfloat %a) nounwind { define i16 @fcvt_ui_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-LABEL: fcvt_ui_bf16_sat: ; CHECK32ZFBFMIN: # %bb.0: # %start +; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, zero ; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK32ZFBFMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK32ZFBFMIN-NEXT: fmv.w.x fa3, zero -; CHECK32ZFBFMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK32ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32ZFBFMIN-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; CHECK32ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32ZFBFMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; CHECK32ZFBFMIN-NEXT: ret ; ; RV32ID-LABEL: fcvt_ui_bf16_sat: ; RV32ID: # %bb.0: # %start -; RV32ID-NEXT: lui a0, %hi(.LCPI3_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0) ; RV32ID-NEXT: fmv.x.w a0, fa0 +; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fa4, a0 -; RV32ID-NEXT: fmv.w.x fa3, zero -; RV32ID-NEXT: fmax.s fa4, fa4, fa3 -; RV32ID-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-NEXT: lui a0, %hi(.LCPI3_0) +; RV32ID-NEXT: fmax.s fa5, fa4, fa5 +; RV32ID-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32ID-NEXT: ret ; ; CHECK64ZFBFMIN-LABEL: fcvt_ui_bf16_sat: ; CHECK64ZFBFMIN: # %bb.0: # %start +; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK64ZFBFMIN-NEXT: fmv.w.x fa4, zero ; CHECK64ZFBFMIN-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK64ZFBFMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; CHECK64ZFBFMIN-NEXT: fcvt.s.bf16 fa4, fa0 -; CHECK64ZFBFMIN-NEXT: fmv.w.x fa3, zero -; CHECK64ZFBFMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK64ZFBFMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64ZFBFMIN-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; CHECK64ZFBFMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64ZFBFMIN-NEXT: fcvt.lu.s a0, fa5, rtz ; CHECK64ZFBFMIN-NEXT: ret ; ; RV64ID-LABEL: fcvt_ui_bf16_sat: ; RV64ID: # %bb.0: # %start -; RV64ID-NEXT: lui a0, %hi(.LCPI3_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0) ; RV64ID-NEXT: fmv.x.w a0, fa0 +; RV64ID-NEXT: fmv.w.x fa5, zero ; RV64ID-NEXT: slli a0, a0, 16 ; RV64ID-NEXT: fmv.w.x fa4, a0 -; RV64ID-NEXT: fmv.w.x fa3, zero -; RV64ID-NEXT: fmax.s fa4, fa4, fa3 -; RV64ID-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-NEXT: lui a0, %hi(.LCPI3_0) +; RV64ID-NEXT: fmax.s fa5, fa4, fa5 +; RV64ID-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64ID-NEXT: ret start: @@ -647,14 +647,14 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; CHECK32ZFBFMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; CHECK32ZFBFMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; CHECK32ZFBFMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa0, fa0 ; CHECK32ZFBFMIN-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK32ZFBFMIN-NEXT: fmv.w.x fa5, zero +; CHECK32ZFBFMIN-NEXT: fle.s a1, fa5, fa0 ; CHECK32ZFBFMIN-NEXT: flw fa5, %lo(.LCPI12_0)(a0) -; CHECK32ZFBFMIN-NEXT: fcvt.s.bf16 fa0, fa0 -; CHECK32ZFBFMIN-NEXT: fmv.w.x fa4, zero -; CHECK32ZFBFMIN-NEXT: fle.s a0, fa4, fa0 -; CHECK32ZFBFMIN-NEXT: flt.s a1, fa5, fa0 -; CHECK32ZFBFMIN-NEXT: neg s0, a1 -; CHECK32ZFBFMIN-NEXT: neg s1, a0 +; CHECK32ZFBFMIN-NEXT: flt.s a0, fa5, fa0 +; CHECK32ZFBFMIN-NEXT: neg s0, a0 +; CHECK32ZFBFMIN-NEXT: neg s1, a1 ; CHECK32ZFBFMIN-NEXT: call __fixunssfdi ; CHECK32ZFBFMIN-NEXT: and a0, s1, a0 ; CHECK32ZFBFMIN-NEXT: and a1, s1, a1 @@ -675,11 +675,11 @@ define i64 @fcvt_lu_bf16_sat(bfloat %a) nounwind { ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: lui a1, %hi(.LCPI12_0) ; RV32ID-NEXT: fmv.w.x fa5, zero -; RV32ID-NEXT: flw fa4, %lo(.LCPI12_0)(a1) ; RV32ID-NEXT: slli a0, a0, 16 ; RV32ID-NEXT: fmv.w.x fa0, a0 ; RV32ID-NEXT: fle.s a0, fa5, fa0 -; RV32ID-NEXT: flt.s a1, fa4, fa0 +; RV32ID-NEXT: flw fa5, %lo(.LCPI12_0)(a1) +; RV32ID-NEXT: flt.s a1, fa5, fa0 ; RV32ID-NEXT: neg s0, a1 ; RV32ID-NEXT: neg s1, a0 ; RV32ID-NEXT: call __fixunssfdi diff --git a/llvm/test/CodeGen/RISCV/bfloat-mem.ll b/llvm/test/CodeGen/RISCV/bfloat-mem.ll index f9cf4e523b77d..504a698615841 100644 --- a/llvm/test/CodeGen/RISCV/bfloat-mem.ll +++ b/llvm/test/CodeGen/RISCV/bfloat-mem.ll @@ -7,11 +7,11 @@ define bfloat @flh(ptr %a) nounwind { ; CHECK-LABEL: flh: ; CHECK: # %bb.0: -; CHECK-NEXT: flh fa5, 6(a0) -; CHECK-NEXT: flh fa4, 0(a0) -; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: flh fa5, 0(a0) +; CHECK-NEXT: flh fa4, 6(a0) ; CHECK-NEXT: fcvt.s.bf16 fa4, fa4 -; CHECK-NEXT: fadd.s fa5, fa4, fa5 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa5 +; CHECK-NEXT: fadd.s fa5, fa5, fa4 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret %1 = load bfloat, ptr %a diff --git a/llvm/test/CodeGen/RISCV/bfloat.ll b/llvm/test/CodeGen/RISCV/bfloat.ll index c83b0ed6b0eee..1b93fdbbb68c2 100644 --- a/llvm/test/CodeGen/RISCV/bfloat.ll +++ b/llvm/test/CodeGen/RISCV/bfloat.ll @@ -447,12 +447,12 @@ define bfloat @bfloat_load(ptr %a) nounwind { ; RV32ID-ILP32: # %bb.0: ; RV32ID-ILP32-NEXT: addi sp, sp, -16 ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: lhu a1, 6(a0) -; RV32ID-ILP32-NEXT: lhu a0, 0(a0) -; RV32ID-ILP32-NEXT: slli a1, a1, 16 +; RV32ID-ILP32-NEXT: lhu a1, 0(a0) +; RV32ID-ILP32-NEXT: lhu a0, 6(a0) ; RV32ID-ILP32-NEXT: slli a0, a0, 16 -; RV32ID-ILP32-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: slli a1, a1, 16 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a1 ; RV32ID-ILP32-NEXT: fadd.s fa5, fa4, fa5 ; RV32ID-ILP32-NEXT: fmv.x.w a0, fa5 ; RV32ID-ILP32-NEXT: call __truncsfbf2 @@ -466,12 +466,12 @@ define bfloat @bfloat_load(ptr %a) nounwind { ; RV64ID-LP64: # %bb.0: ; RV64ID-LP64-NEXT: addi sp, sp, -16 ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64-NEXT: lhu a1, 6(a0) -; RV64ID-LP64-NEXT: lhu a0, 0(a0) -; RV64ID-LP64-NEXT: slli a1, a1, 16 +; RV64ID-LP64-NEXT: lhu a1, 0(a0) +; RV64ID-LP64-NEXT: lhu a0, 6(a0) ; RV64ID-LP64-NEXT: slli a0, a0, 16 -; RV64ID-LP64-NEXT: fmv.w.x fa5, a1 -; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64-NEXT: slli a1, a1, 16 +; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64-NEXT: fmv.w.x fa4, a1 ; RV64ID-LP64-NEXT: fadd.s fa5, fa4, fa5 ; RV64ID-LP64-NEXT: fmv.x.w a0, fa5 ; RV64ID-LP64-NEXT: call __truncsfbf2 @@ -485,12 +485,12 @@ define bfloat @bfloat_load(ptr %a) nounwind { ; RV32ID-ILP32D: # %bb.0: ; RV32ID-ILP32D-NEXT: addi sp, sp, -16 ; RV32ID-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32ID-ILP32D-NEXT: lhu a1, 6(a0) -; RV32ID-ILP32D-NEXT: lhu a0, 0(a0) -; RV32ID-ILP32D-NEXT: slli a1, a1, 16 +; RV32ID-ILP32D-NEXT: lhu a1, 0(a0) +; RV32ID-ILP32D-NEXT: lhu a0, 6(a0) ; RV32ID-ILP32D-NEXT: slli a0, a0, 16 -; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a1 -; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32D-NEXT: slli a1, a1, 16 +; RV32ID-ILP32D-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32D-NEXT: fmv.w.x fa4, a1 ; RV32ID-ILP32D-NEXT: fadd.s fa0, fa4, fa5 ; RV32ID-ILP32D-NEXT: call __truncsfbf2 ; RV32ID-ILP32D-NEXT: fmv.x.w a0, fa0 @@ -505,12 +505,12 @@ define bfloat @bfloat_load(ptr %a) nounwind { ; RV64ID-LP64D: # %bb.0: ; RV64ID-LP64D-NEXT: addi sp, sp, -16 ; RV64ID-LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill -; RV64ID-LP64D-NEXT: lhu a1, 6(a0) -; RV64ID-LP64D-NEXT: lhu a0, 0(a0) -; RV64ID-LP64D-NEXT: slli a1, a1, 16 +; RV64ID-LP64D-NEXT: lhu a1, 0(a0) +; RV64ID-LP64D-NEXT: lhu a0, 6(a0) ; RV64ID-LP64D-NEXT: slli a0, a0, 16 -; RV64ID-LP64D-NEXT: fmv.w.x fa5, a1 -; RV64ID-LP64D-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64D-NEXT: slli a1, a1, 16 +; RV64ID-LP64D-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64D-NEXT: fmv.w.x fa4, a1 ; RV64ID-LP64D-NEXT: fadd.s fa0, fa4, fa5 ; RV64ID-LP64D-NEXT: call __truncsfbf2 ; RV64ID-LP64D-NEXT: fmv.x.w a0, fa0 diff --git a/llvm/test/CodeGen/RISCV/bittest.ll b/llvm/test/CodeGen/RISCV/bittest.ll index d69ab0550a034..0564764c3f0bc 100644 --- a/llvm/test/CodeGen/RISCV/bittest.ll +++ b/llvm/test/CodeGen/RISCV/bittest.ll @@ -552,12 +552,12 @@ declare void @bar() define signext i32 @bit_10_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; CHECK-LABEL: bit_10_z_select_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a3, a0, 1024 -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: beqz a3, .LBB15_2 +; CHECK-NEXT: andi a0, a0, 1024 +; CHECK-NEXT: beqz a0, .LBB15_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: ret %1 = and i32 %a, 1024 %2 = icmp eq i32 %1, 0 @@ -568,22 +568,22 @@ define signext i32 @bit_10_z_select_i32(i32 signext %a, i32 signext %b, i32 sign define signext i32 @bit_10_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_10_nz_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 21 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bltz a3, .LBB16_2 +; RV32-NEXT: slli a0, a0, 21 +; RV32-NEXT: bltz a0, .LBB16_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB16_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_10_nz_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 53 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltz a3, .LBB16_2 +; RV64-NEXT: slli a0, a0, 53 +; RV64-NEXT: bltz a0, .LBB16_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB16_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 1024 %2 = icmp ne i32 %1, 0 @@ -594,22 +594,22 @@ define signext i32 @bit_10_nz_select_i32(i32 signext %a, i32 signext %b, i32 sig define signext i32 @bit_11_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_11_z_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 20 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bgez a3, .LBB17_2 +; RV32-NEXT: slli a0, a0, 20 +; RV32-NEXT: bgez a0, .LBB17_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB17_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_11_z_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 52 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgez a3, .LBB17_2 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: bgez a0, .LBB17_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB17_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 2048 %2 = icmp eq i32 %1, 0 @@ -620,22 +620,22 @@ define signext i32 @bit_11_z_select_i32(i32 signext %a, i32 signext %b, i32 sign define signext i32 @bit_11_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_11_nz_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 20 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bltz a3, .LBB18_2 +; RV32-NEXT: slli a0, a0, 20 +; RV32-NEXT: bltz a0, .LBB18_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB18_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_11_nz_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 52 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltz a3, .LBB18_2 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: bltz a0, .LBB18_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB18_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 2048 %2 = icmp ne i32 %1, 0 @@ -646,22 +646,22 @@ define signext i32 @bit_11_nz_select_i32(i32 signext %a, i32 signext %b, i32 sig define signext i32 @bit_20_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_20_z_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 11 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bgez a3, .LBB19_2 +; RV32-NEXT: slli a0, a0, 11 +; RV32-NEXT: bgez a0, .LBB19_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB19_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_20_z_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 43 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgez a3, .LBB19_2 +; RV64-NEXT: slli a0, a0, 43 +; RV64-NEXT: bgez a0, .LBB19_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB19_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 1048576 %2 = icmp eq i32 %1, 0 @@ -672,22 +672,22 @@ define signext i32 @bit_20_z_select_i32(i32 signext %a, i32 signext %b, i32 sign define signext i32 @bit_20_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_20_nz_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 11 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bltz a3, .LBB20_2 +; RV32-NEXT: slli a0, a0, 11 +; RV32-NEXT: bltz a0, .LBB20_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB20_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_20_nz_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 43 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltz a3, .LBB20_2 +; RV64-NEXT: slli a0, a0, 43 +; RV64-NEXT: bltz a0, .LBB20_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB20_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 1048576 %2 = icmp ne i32 %1, 0 @@ -708,12 +708,12 @@ define signext i32 @bit_31_z_select_i32(i32 signext %a, i32 signext %b, i32 sign ; RV64-LABEL: bit_31_z_select_i32: ; RV64: # %bb.0: ; RV64-NEXT: lui a3, 524288 -; RV64-NEXT: and a3, a0, a3 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB21_2 +; RV64-NEXT: and a0, a0, a3 +; RV64-NEXT: beqz a0, .LBB21_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB21_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 2147483648 %2 = icmp eq i32 %1, 0 @@ -724,23 +724,23 @@ define signext i32 @bit_31_z_select_i32(i32 signext %a, i32 signext %b, i32 sign define signext i32 @bit_31_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_31_nz_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: srli a3, a0, 31 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bnez a3, .LBB22_2 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: bnez a0, .LBB22_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB22_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_31_nz_select_i32: ; RV64: # %bb.0: ; RV64-NEXT: lui a3, 524288 -; RV64-NEXT: and a3, a0, a3 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB22_2 +; RV64-NEXT: and a0, a0, a3 +; RV64-NEXT: bnez a0, .LBB22_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB22_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 2147483648 %2 = icmp ne i32 %1, 0 @@ -752,23 +752,23 @@ define i64 @bit_10_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_10_z_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: andi a3, a0, 1024 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: beqz a3, .LBB23_2 +; RV32-NEXT: andi a0, a0, 1024 +; RV32-NEXT: beqz a0, .LBB23_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB23_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_10_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: andi a3, a0, 1024 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB23_2 +; RV64-NEXT: andi a0, a0, 1024 +; RV64-NEXT: beqz a0, .LBB23_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB23_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 1024 %2 = icmp eq i64 %1, 0 @@ -781,47 +781,47 @@ define i64 @bit_10_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32I: # %bb.0: ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: slli a0, a0, 21 -; RV32I-NEXT: srli a3, a0, 31 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: bnez a3, .LBB24_2 +; RV32I-NEXT: srli a0, a0, 31 +; RV32I-NEXT: bnez a0, .LBB24_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: .LBB24_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV64-LABEL: bit_10_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 53 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltz a3, .LBB24_2 +; RV64-NEXT: slli a0, a0, 53 +; RV64-NEXT: bltz a0, .LBB24_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB24_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret ; ; RV32ZBS-LABEL: bit_10_nz_select_i64: ; RV32ZBS: # %bb.0: ; RV32ZBS-NEXT: mv a1, a3 -; RV32ZBS-NEXT: bexti a3, a0, 10 -; RV32ZBS-NEXT: mv a0, a2 -; RV32ZBS-NEXT: bnez a3, .LBB24_2 +; RV32ZBS-NEXT: bexti a0, a0, 10 +; RV32ZBS-NEXT: bnez a0, .LBB24_2 ; RV32ZBS-NEXT: # %bb.1: -; RV32ZBS-NEXT: mv a0, a4 +; RV32ZBS-NEXT: mv a2, a4 ; RV32ZBS-NEXT: mv a1, a5 ; RV32ZBS-NEXT: .LBB24_2: +; RV32ZBS-NEXT: mv a0, a2 ; RV32ZBS-NEXT: ret ; ; RV32XTHEADBS-LABEL: bit_10_nz_select_i64: ; RV32XTHEADBS: # %bb.0: ; RV32XTHEADBS-NEXT: mv a1, a3 -; RV32XTHEADBS-NEXT: th.tst a3, a0, 10 -; RV32XTHEADBS-NEXT: mv a0, a2 -; RV32XTHEADBS-NEXT: bnez a3, .LBB24_2 +; RV32XTHEADBS-NEXT: th.tst a0, a0, 10 +; RV32XTHEADBS-NEXT: bnez a0, .LBB24_2 ; RV32XTHEADBS-NEXT: # %bb.1: -; RV32XTHEADBS-NEXT: mv a0, a4 +; RV32XTHEADBS-NEXT: mv a2, a4 ; RV32XTHEADBS-NEXT: mv a1, a5 ; RV32XTHEADBS-NEXT: .LBB24_2: +; RV32XTHEADBS-NEXT: mv a0, a2 ; RV32XTHEADBS-NEXT: ret %1 = and i64 %a, 1024 %2 = icmp ne i64 %1, 0 @@ -833,23 +833,23 @@ define i64 @bit_11_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_11_z_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: slli a3, a0, 20 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bgez a3, .LBB25_2 +; RV32-NEXT: slli a0, a0, 20 +; RV32-NEXT: bgez a0, .LBB25_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB25_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_11_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 52 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgez a3, .LBB25_2 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: bgez a0, .LBB25_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB25_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 2048 %2 = icmp eq i64 %1, 0 @@ -862,47 +862,47 @@ define i64 @bit_11_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32I: # %bb.0: ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: slli a0, a0, 20 -; RV32I-NEXT: srli a3, a0, 31 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: bnez a3, .LBB26_2 +; RV32I-NEXT: srli a0, a0, 31 +; RV32I-NEXT: bnez a0, .LBB26_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: .LBB26_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV64-LABEL: bit_11_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 52 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltz a3, .LBB26_2 +; RV64-NEXT: slli a0, a0, 52 +; RV64-NEXT: bltz a0, .LBB26_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB26_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret ; ; RV32ZBS-LABEL: bit_11_nz_select_i64: ; RV32ZBS: # %bb.0: ; RV32ZBS-NEXT: mv a1, a3 -; RV32ZBS-NEXT: bexti a3, a0, 11 -; RV32ZBS-NEXT: mv a0, a2 -; RV32ZBS-NEXT: bnez a3, .LBB26_2 +; RV32ZBS-NEXT: bexti a0, a0, 11 +; RV32ZBS-NEXT: bnez a0, .LBB26_2 ; RV32ZBS-NEXT: # %bb.1: -; RV32ZBS-NEXT: mv a0, a4 +; RV32ZBS-NEXT: mv a2, a4 ; RV32ZBS-NEXT: mv a1, a5 ; RV32ZBS-NEXT: .LBB26_2: +; RV32ZBS-NEXT: mv a0, a2 ; RV32ZBS-NEXT: ret ; ; RV32XTHEADBS-LABEL: bit_11_nz_select_i64: ; RV32XTHEADBS: # %bb.0: ; RV32XTHEADBS-NEXT: mv a1, a3 -; RV32XTHEADBS-NEXT: th.tst a3, a0, 11 -; RV32XTHEADBS-NEXT: mv a0, a2 -; RV32XTHEADBS-NEXT: bnez a3, .LBB26_2 +; RV32XTHEADBS-NEXT: th.tst a0, a0, 11 +; RV32XTHEADBS-NEXT: bnez a0, .LBB26_2 ; RV32XTHEADBS-NEXT: # %bb.1: -; RV32XTHEADBS-NEXT: mv a0, a4 +; RV32XTHEADBS-NEXT: mv a2, a4 ; RV32XTHEADBS-NEXT: mv a1, a5 ; RV32XTHEADBS-NEXT: .LBB26_2: +; RV32XTHEADBS-NEXT: mv a0, a2 ; RV32XTHEADBS-NEXT: ret %1 = and i64 %a, 2048 %2 = icmp ne i64 %1, 0 @@ -914,23 +914,23 @@ define i64 @bit_20_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_20_z_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: slli a3, a0, 11 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bgez a3, .LBB27_2 +; RV32-NEXT: slli a0, a0, 11 +; RV32-NEXT: bgez a0, .LBB27_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB27_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_20_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 43 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgez a3, .LBB27_2 +; RV64-NEXT: slli a0, a0, 43 +; RV64-NEXT: bgez a0, .LBB27_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB27_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 1048576 %2 = icmp eq i64 %1, 0 @@ -943,47 +943,47 @@ define i64 @bit_20_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32I: # %bb.0: ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: slli a0, a0, 11 -; RV32I-NEXT: srli a3, a0, 31 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: bnez a3, .LBB28_2 +; RV32I-NEXT: srli a0, a0, 31 +; RV32I-NEXT: bnez a0, .LBB28_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: .LBB28_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV64-LABEL: bit_20_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 43 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltz a3, .LBB28_2 +; RV64-NEXT: slli a0, a0, 43 +; RV64-NEXT: bltz a0, .LBB28_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB28_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret ; ; RV32ZBS-LABEL: bit_20_nz_select_i64: ; RV32ZBS: # %bb.0: ; RV32ZBS-NEXT: mv a1, a3 -; RV32ZBS-NEXT: bexti a3, a0, 20 -; RV32ZBS-NEXT: mv a0, a2 -; RV32ZBS-NEXT: bnez a3, .LBB28_2 +; RV32ZBS-NEXT: bexti a0, a0, 20 +; RV32ZBS-NEXT: bnez a0, .LBB28_2 ; RV32ZBS-NEXT: # %bb.1: -; RV32ZBS-NEXT: mv a0, a4 +; RV32ZBS-NEXT: mv a2, a4 ; RV32ZBS-NEXT: mv a1, a5 ; RV32ZBS-NEXT: .LBB28_2: +; RV32ZBS-NEXT: mv a0, a2 ; RV32ZBS-NEXT: ret ; ; RV32XTHEADBS-LABEL: bit_20_nz_select_i64: ; RV32XTHEADBS: # %bb.0: ; RV32XTHEADBS-NEXT: mv a1, a3 -; RV32XTHEADBS-NEXT: th.tst a3, a0, 20 -; RV32XTHEADBS-NEXT: mv a0, a2 -; RV32XTHEADBS-NEXT: bnez a3, .LBB28_2 +; RV32XTHEADBS-NEXT: th.tst a0, a0, 20 +; RV32XTHEADBS-NEXT: bnez a0, .LBB28_2 ; RV32XTHEADBS-NEXT: # %bb.1: -; RV32XTHEADBS-NEXT: mv a0, a4 +; RV32XTHEADBS-NEXT: mv a2, a4 ; RV32XTHEADBS-NEXT: mv a1, a5 ; RV32XTHEADBS-NEXT: .LBB28_2: +; RV32XTHEADBS-NEXT: mv a0, a2 ; RV32XTHEADBS-NEXT: ret %1 = and i64 %a, 1048576 %2 = icmp ne i64 %1, 0 @@ -1005,12 +1005,12 @@ define i64 @bit_31_z_select_i64(i64 %a, i64 %b, i64 %c) { ; ; RV64-LABEL: bit_31_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 32 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgez a3, .LBB29_2 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: bgez a0, .LBB29_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB29_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 2147483648 %2 = icmp eq i64 %1, 0 @@ -1022,23 +1022,23 @@ define i64 @bit_31_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_31_nz_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: srli a3, a0, 31 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bnez a3, .LBB30_2 +; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: bnez a0, .LBB30_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB30_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_31_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 32 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltz a3, .LBB30_2 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: bltz a0, .LBB30_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB30_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 2147483648 %2 = icmp ne i64 %1, 0 @@ -1049,8 +1049,8 @@ define i64 @bit_31_nz_select_i64(i64 %a, i64 %b, i64 %c) { define i64 @bit_32_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_32_z_select_i64: ; RV32: # %bb.0: -; RV32-NEXT: andi a1, a1, 1 ; RV32-NEXT: mv a0, a2 +; RV32-NEXT: andi a1, a1, 1 ; RV32-NEXT: beqz a1, .LBB31_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a4 @@ -1061,12 +1061,12 @@ define i64 @bit_32_z_select_i64(i64 %a, i64 %b, i64 %c) { ; ; RV64-LABEL: bit_32_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 31 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgez a3, .LBB31_2 +; RV64-NEXT: slli a0, a0, 31 +; RV64-NEXT: bgez a0, .LBB31_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB31_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 4294967296 %2 = icmp eq i64 %1, 0 @@ -1077,8 +1077,8 @@ define i64 @bit_32_z_select_i64(i64 %a, i64 %b, i64 %c) { define i64 @bit_32_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_32_nz_select_i64: ; RV32: # %bb.0: -; RV32-NEXT: andi a1, a1, 1 ; RV32-NEXT: mv a0, a2 +; RV32-NEXT: andi a1, a1, 1 ; RV32-NEXT: bnez a1, .LBB32_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a4 @@ -1089,12 +1089,12 @@ define i64 @bit_32_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; ; RV64-LABEL: bit_32_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 31 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltz a3, .LBB32_2 +; RV64-NEXT: slli a0, a0, 31 +; RV64-NEXT: bltz a0, .LBB32_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB32_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 4294967296 %2 = icmp ne i64 %1, 0 @@ -1105,8 +1105,8 @@ define i64 @bit_32_nz_select_i64(i64 %a, i64 %b, i64 %c) { define i64 @bit_55_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_55_z_select_i64: ; RV32: # %bb.0: -; RV32-NEXT: slli a1, a1, 8 ; RV32-NEXT: mv a0, a2 +; RV32-NEXT: slli a1, a1, 8 ; RV32-NEXT: bgez a1, .LBB33_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a4 @@ -1117,12 +1117,12 @@ define i64 @bit_55_z_select_i64(i64 %a, i64 %b, i64 %c) { ; ; RV64-LABEL: bit_55_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 8 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgez a3, .LBB33_2 +; RV64-NEXT: slli a0, a0, 8 +; RV64-NEXT: bgez a0, .LBB33_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB33_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 36028797018963968 %2 = icmp eq i64 %1, 0 @@ -1133,9 +1133,9 @@ define i64 @bit_55_z_select_i64(i64 %a, i64 %b, i64 %c) { define i64 @bit_55_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32I-LABEL: bit_55_nz_select_i64: ; RV32I: # %bb.0: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: slli a1, a1, 8 ; RV32I-NEXT: srli a1, a1, 31 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: bnez a1, .LBB34_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a4 @@ -1146,18 +1146,18 @@ define i64 @bit_55_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; ; RV64-LABEL: bit_55_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 8 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltz a3, .LBB34_2 +; RV64-NEXT: slli a0, a0, 8 +; RV64-NEXT: bltz a0, .LBB34_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB34_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret ; ; RV32ZBS-LABEL: bit_55_nz_select_i64: ; RV32ZBS: # %bb.0: -; RV32ZBS-NEXT: bexti a1, a1, 23 ; RV32ZBS-NEXT: mv a0, a2 +; RV32ZBS-NEXT: bexti a1, a1, 23 ; RV32ZBS-NEXT: bnez a1, .LBB34_2 ; RV32ZBS-NEXT: # %bb.1: ; RV32ZBS-NEXT: mv a0, a4 @@ -1168,8 +1168,8 @@ define i64 @bit_55_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; ; RV32XTHEADBS-LABEL: bit_55_nz_select_i64: ; RV32XTHEADBS: # %bb.0: -; RV32XTHEADBS-NEXT: th.tst a1, a1, 23 ; RV32XTHEADBS-NEXT: mv a0, a2 +; RV32XTHEADBS-NEXT: th.tst a1, a1, 23 ; RV32XTHEADBS-NEXT: bnez a1, .LBB34_2 ; RV32XTHEADBS-NEXT: # %bb.1: ; RV32XTHEADBS-NEXT: mv a0, a4 @@ -1212,8 +1212,8 @@ define i64 @bit_63_z_select_i64(i64 %a, i64 %b, i64 %c) { define i64 @bit_63_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_63_nz_select_i64: ; RV32: # %bb.0: -; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: mv a0, a2 +; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: bnez a1, .LBB36_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: mv a0, a4 @@ -1224,12 +1224,12 @@ define i64 @bit_63_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; ; RV64-LABEL: bit_63_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: srli a3, a0, 63 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB36_2 +; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: bnez a0, .LBB36_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB36_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 9223372036854775808 %2 = icmp ne i64 %1, 0 @@ -1858,12 +1858,12 @@ define void @bit_63_nz_branch_i64(i64 %0) { define signext i32 @bit_10_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; CHECK-LABEL: bit_10_1_z_select_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a3, a0, 1023 -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: beqz a3, .LBB59_2 +; CHECK-NEXT: andi a0, a0, 1023 +; CHECK-NEXT: beqz a0, .LBB59_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB59_2: +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: ret %1 = and i32 %a, 1023 %2 = icmp eq i32 %1, 0 @@ -1874,12 +1874,12 @@ define signext i32 @bit_10_1_z_select_i32(i32 signext %a, i32 signext %b, i32 si define signext i32 @bit_10_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; CHECK-LABEL: bit_10_1_nz_select_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a3, a0, 1023 -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: bnez a3, .LBB60_2 +; CHECK-NEXT: andi a0, a0, 1023 +; CHECK-NEXT: bnez a0, .LBB60_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB60_2: +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: ret %1 = and i32 %a, 1023 %2 = icmp ne i32 %1, 0 @@ -1890,12 +1890,12 @@ define signext i32 @bit_10_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 s define signext i32 @bit_11_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; CHECK-LABEL: bit_11_1_z_select_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a3, a0, 2047 -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: beqz a3, .LBB61_2 +; CHECK-NEXT: andi a0, a0, 2047 +; CHECK-NEXT: beqz a0, .LBB61_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB61_2: +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: ret %1 = and i32 %a, 2047 %2 = icmp eq i32 %1, 0 @@ -1906,12 +1906,12 @@ define signext i32 @bit_11_1_z_select_i32(i32 signext %a, i32 signext %b, i32 si define signext i32 @bit_11_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; CHECK-LABEL: bit_11_1_nz_select_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a3, a0, 2047 -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: bnez a3, .LBB62_2 +; CHECK-NEXT: andi a0, a0, 2047 +; CHECK-NEXT: bnez a0, .LBB62_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a0, a2 +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB62_2: +; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: ret %1 = and i32 %a, 2047 %2 = icmp ne i32 %1, 0 @@ -1922,22 +1922,22 @@ define signext i32 @bit_11_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 s define signext i32 @bit_16_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_16_1_z_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 16 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beqz a3, .LBB63_2 +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: beqz a0, .LBB63_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB63_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_16_1_z_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 48 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB63_2 +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: beqz a0, .LBB63_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB63_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 65535 %2 = icmp eq i32 %1, 0 @@ -1948,22 +1948,22 @@ define signext i32 @bit_16_1_z_select_i32(i32 signext %a, i32 signext %b, i32 si define signext i32 @bit_16_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_16_1_nz_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 16 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bnez a3, .LBB64_2 +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: bnez a0, .LBB64_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB64_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_16_1_nz_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 48 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB64_2 +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: bnez a0, .LBB64_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB64_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 65535 %2 = icmp ne i32 %1, 0 @@ -1974,22 +1974,22 @@ define signext i32 @bit_16_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 s define signext i32 @bit_20_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_20_1_z_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 12 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beqz a3, .LBB65_2 +; RV32-NEXT: slli a0, a0, 12 +; RV32-NEXT: beqz a0, .LBB65_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB65_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_20_1_z_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 44 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB65_2 +; RV64-NEXT: slli a0, a0, 44 +; RV64-NEXT: beqz a0, .LBB65_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB65_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 1048575 %2 = icmp eq i32 %1, 0 @@ -2000,22 +2000,22 @@ define signext i32 @bit_20_1_z_select_i32(i32 signext %a, i32 signext %b, i32 si define signext i32 @bit_20_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_20_1_nz_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 12 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bnez a3, .LBB66_2 +; RV32-NEXT: slli a0, a0, 12 +; RV32-NEXT: bnez a0, .LBB66_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB66_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_20_1_nz_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 44 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB66_2 +; RV64-NEXT: slli a0, a0, 44 +; RV64-NEXT: bnez a0, .LBB66_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB66_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 1048575 %2 = icmp ne i32 %1, 0 @@ -2026,22 +2026,22 @@ define signext i32 @bit_20_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 s define signext i32 @bit_31_1_z_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_31_1_z_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 1 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beqz a3, .LBB67_2 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: beqz a0, .LBB67_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB67_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_31_1_z_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 33 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB67_2 +; RV64-NEXT: slli a0, a0, 33 +; RV64-NEXT: beqz a0, .LBB67_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB67_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 2147483647 %2 = icmp eq i32 %1, 0 @@ -2052,22 +2052,22 @@ define signext i32 @bit_31_1_z_select_i32(i32 signext %a, i32 signext %b, i32 si define signext i32 @bit_31_1_nz_select_i32(i32 signext %a, i32 signext %b, i32 signext %c) { ; RV32-LABEL: bit_31_1_nz_select_i32: ; RV32: # %bb.0: -; RV32-NEXT: slli a3, a0, 1 -; RV32-NEXT: mv a0, a1 -; RV32-NEXT: bnez a3, .LBB68_2 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: bnez a0, .LBB68_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a2 +; RV32-NEXT: mv a1, a2 ; RV32-NEXT: .LBB68_2: +; RV32-NEXT: mv a0, a1 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_31_1_nz_select_i32: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 33 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB68_2 +; RV64-NEXT: slli a0, a0, 33 +; RV64-NEXT: bnez a0, .LBB68_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB68_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i32 %a, 2147483647 %2 = icmp ne i32 %1, 0 @@ -2109,23 +2109,23 @@ define i64 @bit_10_1_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_10_1_z_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: andi a3, a0, 1023 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: beqz a3, .LBB71_2 +; RV32-NEXT: andi a0, a0, 1023 +; RV32-NEXT: beqz a0, .LBB71_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB71_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_10_1_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: andi a3, a0, 1023 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB71_2 +; RV64-NEXT: andi a0, a0, 1023 +; RV64-NEXT: beqz a0, .LBB71_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB71_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 1023 %2 = icmp eq i64 %1, 0 @@ -2137,23 +2137,23 @@ define i64 @bit_10_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_10_1_nz_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: andi a3, a0, 1023 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bnez a3, .LBB72_2 +; RV32-NEXT: andi a0, a0, 1023 +; RV32-NEXT: bnez a0, .LBB72_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB72_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_10_1_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: andi a3, a0, 1023 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB72_2 +; RV64-NEXT: andi a0, a0, 1023 +; RV64-NEXT: bnez a0, .LBB72_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB72_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 1023 %2 = icmp ne i64 %1, 0 @@ -2165,23 +2165,23 @@ define i64 @bit_11_1_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_11_1_z_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: andi a3, a0, 2047 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: beqz a3, .LBB73_2 +; RV32-NEXT: andi a0, a0, 2047 +; RV32-NEXT: beqz a0, .LBB73_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB73_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_11_1_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: andi a3, a0, 2047 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB73_2 +; RV64-NEXT: andi a0, a0, 2047 +; RV64-NEXT: beqz a0, .LBB73_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB73_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 2047 %2 = icmp eq i64 %1, 0 @@ -2193,23 +2193,23 @@ define i64 @bit_11_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_11_1_nz_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: andi a3, a0, 2047 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bnez a3, .LBB74_2 +; RV32-NEXT: andi a0, a0, 2047 +; RV32-NEXT: bnez a0, .LBB74_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB74_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_11_1_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: andi a3, a0, 2047 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB74_2 +; RV64-NEXT: andi a0, a0, 2047 +; RV64-NEXT: bnez a0, .LBB74_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB74_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 2047 %2 = icmp ne i64 %1, 0 @@ -2221,23 +2221,23 @@ define i64 @bit_16_1_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_16_1_z_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: slli a3, a0, 16 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: beqz a3, .LBB75_2 +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: beqz a0, .LBB75_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB75_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_16_1_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 48 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB75_2 +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: beqz a0, .LBB75_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB75_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 65535 %2 = icmp eq i64 %1, 0 @@ -2259,12 +2259,12 @@ define i64 @bit_16_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; ; RV64-LABEL: bit_16_1_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: sext.w a3, a0 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB76_2 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: bnez a0, .LBB76_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB76_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 4294967295 %2 = icmp ne i64 %1, 0 @@ -2277,23 +2277,23 @@ define i64 @bit_20_1_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_20_1_z_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: slli a3, a0, 12 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: beqz a3, .LBB77_2 +; RV32-NEXT: slli a0, a0, 12 +; RV32-NEXT: beqz a0, .LBB77_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB77_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_20_1_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 44 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB77_2 +; RV64-NEXT: slli a0, a0, 44 +; RV64-NEXT: beqz a0, .LBB77_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB77_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 1048575 %2 = icmp eq i64 %1, 0 @@ -2305,23 +2305,23 @@ define i64 @bit_20_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_20_1_nz_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: slli a3, a0, 12 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bnez a3, .LBB78_2 +; RV32-NEXT: slli a0, a0, 12 +; RV32-NEXT: bnez a0, .LBB78_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB78_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_20_1_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 44 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB78_2 +; RV64-NEXT: slli a0, a0, 44 +; RV64-NEXT: bnez a0, .LBB78_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB78_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 1048575 %2 = icmp ne i64 %1, 0 @@ -2333,23 +2333,23 @@ define i64 @bit_31_1_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_31_1_z_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: slli a3, a0, 1 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: beqz a3, .LBB79_2 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: beqz a0, .LBB79_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB79_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_31_1_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 33 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB79_2 +; RV64-NEXT: slli a0, a0, 33 +; RV64-NEXT: beqz a0, .LBB79_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB79_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 2147483647 %2 = icmp eq i64 %1, 0 @@ -2361,23 +2361,23 @@ define i64 @bit_31_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_31_1_nz_select_i64: ; RV32: # %bb.0: ; RV32-NEXT: mv a1, a3 -; RV32-NEXT: slli a3, a0, 1 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bnez a3, .LBB80_2 +; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: bnez a0, .LBB80_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a1, a5 ; RV32-NEXT: .LBB80_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_31_1_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 33 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB80_2 +; RV64-NEXT: slli a0, a0, 33 +; RV64-NEXT: bnez a0, .LBB80_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB80_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 2147483647 %2 = icmp ne i64 %1, 0 @@ -2399,12 +2399,12 @@ define i64 @bit_32_1_z_select_i64(i64 %a, i64 %b, i64 %c) { ; ; RV64-LABEL: bit_32_1_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: sext.w a3, a0 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB81_2 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: beqz a0, .LBB81_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB81_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 4294967295 %2 = icmp eq i64 %1, 0 @@ -2426,12 +2426,12 @@ define i64 @bit_32_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; ; RV64-LABEL: bit_32_1_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: sext.w a3, a0 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB82_2 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: bnez a0, .LBB82_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB82_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 4294967295 %2 = icmp ne i64 %1, 0 @@ -2444,24 +2444,24 @@ define i64 @bit_55_1_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32: # %bb.0: ; RV32-NEXT: slli a1, a1, 9 ; RV32-NEXT: srli a1, a1, 9 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: beqz a1, .LBB83_2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: beqz a0, .LBB83_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a3, a5 ; RV32-NEXT: .LBB83_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: mv a1, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_55_1_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 9 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB83_2 +; RV64-NEXT: slli a0, a0, 9 +; RV64-NEXT: beqz a0, .LBB83_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB83_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 36028797018963967 %2 = icmp eq i64 %1, 0 @@ -2474,24 +2474,24 @@ define i64 @bit_55_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32: # %bb.0: ; RV32-NEXT: slli a1, a1, 9 ; RV32-NEXT: srli a1, a1, 9 -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bnez a1, .LBB84_2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: bnez a0, .LBB84_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a3, a5 ; RV32-NEXT: .LBB84_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: mv a1, a3 ; RV32-NEXT: ret ; ; RV64-LABEL: bit_55_1_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 9 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB84_2 +; RV64-NEXT: slli a0, a0, 9 +; RV64-NEXT: bnez a0, .LBB84_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB84_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %1 = and i64 %a, 36028797018963967 %2 = icmp ne i64 %1, 0 @@ -2504,36 +2504,36 @@ define i64 @bit_63_1_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a1, 1 ; RV32I-NEXT: srli a1, a1, 1 -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: beqz a1, .LBB85_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: beqz a0, .LBB85_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB85_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; ; RV64-LABEL: bit_63_1_z_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 1 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: beqz a3, .LBB85_2 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: beqz a0, .LBB85_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB85_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret ; ; RV32ZBS-LABEL: bit_63_1_z_select_i64: ; RV32ZBS: # %bb.0: ; RV32ZBS-NEXT: bclri a1, a1, 31 -; RV32ZBS-NEXT: or a1, a0, a1 -; RV32ZBS-NEXT: mv a0, a2 -; RV32ZBS-NEXT: beqz a1, .LBB85_2 +; RV32ZBS-NEXT: or a0, a0, a1 +; RV32ZBS-NEXT: beqz a0, .LBB85_2 ; RV32ZBS-NEXT: # %bb.1: -; RV32ZBS-NEXT: mv a0, a4 +; RV32ZBS-NEXT: mv a2, a4 ; RV32ZBS-NEXT: mv a3, a5 ; RV32ZBS-NEXT: .LBB85_2: +; RV32ZBS-NEXT: mv a0, a2 ; RV32ZBS-NEXT: mv a1, a3 ; RV32ZBS-NEXT: ret ; @@ -2541,13 +2541,13 @@ define i64 @bit_63_1_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32XTHEADBS: # %bb.0: ; RV32XTHEADBS-NEXT: slli a1, a1, 1 ; RV32XTHEADBS-NEXT: srli a1, a1, 1 -; RV32XTHEADBS-NEXT: or a1, a0, a1 -; RV32XTHEADBS-NEXT: mv a0, a2 -; RV32XTHEADBS-NEXT: beqz a1, .LBB85_2 +; RV32XTHEADBS-NEXT: or a0, a0, a1 +; RV32XTHEADBS-NEXT: beqz a0, .LBB85_2 ; RV32XTHEADBS-NEXT: # %bb.1: -; RV32XTHEADBS-NEXT: mv a0, a4 +; RV32XTHEADBS-NEXT: mv a2, a4 ; RV32XTHEADBS-NEXT: mv a3, a5 ; RV32XTHEADBS-NEXT: .LBB85_2: +; RV32XTHEADBS-NEXT: mv a0, a2 ; RV32XTHEADBS-NEXT: mv a1, a3 ; RV32XTHEADBS-NEXT: ret %1 = and i64 %a, 9223372036854775807 @@ -2561,36 +2561,36 @@ define i64 @bit_63_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32I: # %bb.0: ; RV32I-NEXT: slli a1, a1, 1 ; RV32I-NEXT: srli a1, a1, 1 -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: bnez a1, .LBB86_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: bnez a0, .LBB86_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB86_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; ; RV64-LABEL: bit_63_1_nz_select_i64: ; RV64: # %bb.0: -; RV64-NEXT: slli a3, a0, 1 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bnez a3, .LBB86_2 +; RV64-NEXT: slli a0, a0, 1 +; RV64-NEXT: bnez a0, .LBB86_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB86_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret ; ; RV32ZBS-LABEL: bit_63_1_nz_select_i64: ; RV32ZBS: # %bb.0: ; RV32ZBS-NEXT: bclri a1, a1, 31 -; RV32ZBS-NEXT: or a1, a0, a1 -; RV32ZBS-NEXT: mv a0, a2 -; RV32ZBS-NEXT: bnez a1, .LBB86_2 +; RV32ZBS-NEXT: or a0, a0, a1 +; RV32ZBS-NEXT: bnez a0, .LBB86_2 ; RV32ZBS-NEXT: # %bb.1: -; RV32ZBS-NEXT: mv a0, a4 +; RV32ZBS-NEXT: mv a2, a4 ; RV32ZBS-NEXT: mv a3, a5 ; RV32ZBS-NEXT: .LBB86_2: +; RV32ZBS-NEXT: mv a0, a2 ; RV32ZBS-NEXT: mv a1, a3 ; RV32ZBS-NEXT: ret ; @@ -2598,13 +2598,13 @@ define i64 @bit_63_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32XTHEADBS: # %bb.0: ; RV32XTHEADBS-NEXT: slli a1, a1, 1 ; RV32XTHEADBS-NEXT: srli a1, a1, 1 -; RV32XTHEADBS-NEXT: or a1, a0, a1 -; RV32XTHEADBS-NEXT: mv a0, a2 -; RV32XTHEADBS-NEXT: bnez a1, .LBB86_2 +; RV32XTHEADBS-NEXT: or a0, a0, a1 +; RV32XTHEADBS-NEXT: bnez a0, .LBB86_2 ; RV32XTHEADBS-NEXT: # %bb.1: -; RV32XTHEADBS-NEXT: mv a0, a4 +; RV32XTHEADBS-NEXT: mv a2, a4 ; RV32XTHEADBS-NEXT: mv a3, a5 ; RV32XTHEADBS-NEXT: .LBB86_2: +; RV32XTHEADBS-NEXT: mv a0, a2 ; RV32XTHEADBS-NEXT: mv a1, a3 ; RV32XTHEADBS-NEXT: ret %1 = and i64 %a, 9223372036854775807 @@ -2616,13 +2616,13 @@ define i64 @bit_63_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { define i64 @bit_64_1_z_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_64_1_z_select_i64: ; RV32: # %bb.0: -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: beqz a1, .LBB87_2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: beqz a0, .LBB87_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a3, a5 ; RV32-NEXT: .LBB87_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: mv a1, a3 ; RV32-NEXT: ret ; @@ -2643,13 +2643,13 @@ define i64 @bit_64_1_z_select_i64(i64 %a, i64 %b, i64 %c) { define i64 @bit_64_1_nz_select_i64(i64 %a, i64 %b, i64 %c) { ; RV32-LABEL: bit_64_1_nz_select_i64: ; RV32: # %bb.0: -; RV32-NEXT: or a1, a0, a1 -; RV32-NEXT: mv a0, a2 -; RV32-NEXT: bnez a1, .LBB88_2 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: bnez a0, .LBB88_2 ; RV32-NEXT: # %bb.1: -; RV32-NEXT: mv a0, a4 +; RV32-NEXT: mv a2, a4 ; RV32-NEXT: mv a3, a5 ; RV32-NEXT: .LBB88_2: +; RV32-NEXT: mv a0, a2 ; RV32-NEXT: mv a1, a3 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/branch-on-zero.ll b/llvm/test/CodeGen/RISCV/branch-on-zero.ll index 02aeebdeb3775..de325010bb281 100644 --- a/llvm/test/CodeGen/RISCV/branch-on-zero.ll +++ b/llvm/test/CodeGen/RISCV/branch-on-zero.ll @@ -76,8 +76,8 @@ define i32 @test_lshr(i32 %v) { ; RV32-NEXT: .LBB2_1: # %for.body ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: andi a2, a0, 1 -; RV32-NEXT: srli a0, a0, 1 ; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: srli a0, a0, 1 ; RV32-NEXT: bnez a0, .LBB2_1 ; RV32-NEXT: .LBB2_2: # %for.end ; RV32-NEXT: mv a0, a1 @@ -92,8 +92,8 @@ define i32 @test_lshr(i32 %v) { ; RV64-NEXT: .LBB2_2: # %for.body ; RV64-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64-NEXT: andi a2, a0, 1 -; RV64-NEXT: srliw a0, a0, 1 ; RV64-NEXT: addw a1, a1, a2 +; RV64-NEXT: srliw a0, a0, 1 ; RV64-NEXT: bnez a0, .LBB2_2 ; RV64-NEXT: .LBB2_3: # %for.end ; RV64-NEXT: mv a0, a1 @@ -129,9 +129,8 @@ define i32 @test_lshr2(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) { ; RV32-NEXT: lw a3, 0(a1) ; RV32-NEXT: addi a4, a1, 4 ; RV32-NEXT: slli a3, a3, 1 -; RV32-NEXT: addi a1, a0, 4 ; RV32-NEXT: sw a3, 0(a0) -; RV32-NEXT: mv a0, a1 +; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: mv a1, a4 ; RV32-NEXT: bne a4, a2, .LBB3_2 ; RV32-NEXT: .LBB3_3: # %while.end @@ -153,9 +152,8 @@ define i32 @test_lshr2(ptr nocapture %x, ptr nocapture readonly %y, i32 %n) { ; RV64-NEXT: lw a3, 0(a1) ; RV64-NEXT: addi a4, a1, 4 ; RV64-NEXT: slli a3, a3, 1 -; RV64-NEXT: addi a1, a0, 4 ; RV64-NEXT: sw a3, 0(a0) -; RV64-NEXT: mv a0, a1 +; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: mv a1, a4 ; RV64-NEXT: bne a4, a2, .LBB3_2 ; RV64-NEXT: .LBB3_3: # %while.end diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll index 337e9bc5845f9..88ad8e6930287 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr32s.ll @@ -53,22 +53,22 @@ define void @callee() nounwind { ; ILP32-NEXT: flw fs1, 84(a1) ; ILP32-NEXT: flw fs2, 88(a1) ; ILP32-NEXT: flw fs3, 92(a1) -; ILP32-NEXT: flw fs4, 112(a1) -; ILP32-NEXT: flw fs5, 116(a1) -; ILP32-NEXT: flw fs6, 120(a1) -; ILP32-NEXT: flw fs7, 124(a1) -; ILP32-NEXT: flw fs8, 96(a1) -; ILP32-NEXT: flw fs9, 100(a1) -; ILP32-NEXT: flw fs10, 104(a1) -; ILP32-NEXT: flw fs11, 108(a1) -; ILP32-NEXT: fsw fs7, 124(a1) -; ILP32-NEXT: fsw fs6, 120(a1) -; ILP32-NEXT: fsw fs5, 116(a1) -; ILP32-NEXT: fsw fs4, 112(a1) -; ILP32-NEXT: fsw fs11, 108(a1) -; ILP32-NEXT: fsw fs10, 104(a1) -; ILP32-NEXT: fsw fs9, 100(a1) -; ILP32-NEXT: fsw fs8, 96(a1) +; ILP32-NEXT: flw fs4, 96(a1) +; ILP32-NEXT: flw fs5, 100(a1) +; ILP32-NEXT: flw fs6, 104(a1) +; ILP32-NEXT: flw fs7, 108(a1) +; ILP32-NEXT: flw fs8, 112(a1) +; ILP32-NEXT: flw fs9, 116(a1) +; ILP32-NEXT: flw fs10, 120(a1) +; ILP32-NEXT: flw fs11, 124(a1) +; ILP32-NEXT: fsw fs11, 124(a1) +; ILP32-NEXT: fsw fs10, 120(a1) +; ILP32-NEXT: fsw fs9, 116(a1) +; ILP32-NEXT: fsw fs8, 112(a1) +; ILP32-NEXT: fsw fs7, 108(a1) +; ILP32-NEXT: fsw fs6, 104(a1) +; ILP32-NEXT: fsw fs5, 100(a1) +; ILP32-NEXT: fsw fs4, 96(a1) ; ILP32-NEXT: fsw fs3, 92(a1) ; ILP32-NEXT: fsw fs2, 88(a1) ; ILP32-NEXT: fsw fs1, 84(a1) @@ -123,22 +123,22 @@ define void @callee() nounwind { ; ILP32E-NEXT: flw fs1, 84(a1) ; ILP32E-NEXT: flw fs2, 88(a1) ; ILP32E-NEXT: flw fs3, 92(a1) -; ILP32E-NEXT: flw fs4, 112(a1) -; ILP32E-NEXT: flw fs5, 116(a1) -; ILP32E-NEXT: flw fs6, 120(a1) -; ILP32E-NEXT: flw fs7, 124(a1) -; ILP32E-NEXT: flw fs8, 96(a1) -; ILP32E-NEXT: flw fs9, 100(a1) -; ILP32E-NEXT: flw fs10, 104(a1) -; ILP32E-NEXT: flw fs11, 108(a1) -; ILP32E-NEXT: fsw fs7, 124(a1) -; ILP32E-NEXT: fsw fs6, 120(a1) -; ILP32E-NEXT: fsw fs5, 116(a1) -; ILP32E-NEXT: fsw fs4, 112(a1) -; ILP32E-NEXT: fsw fs11, 108(a1) -; ILP32E-NEXT: fsw fs10, 104(a1) -; ILP32E-NEXT: fsw fs9, 100(a1) -; ILP32E-NEXT: fsw fs8, 96(a1) +; ILP32E-NEXT: flw fs4, 96(a1) +; ILP32E-NEXT: flw fs5, 100(a1) +; ILP32E-NEXT: flw fs6, 104(a1) +; ILP32E-NEXT: flw fs7, 108(a1) +; ILP32E-NEXT: flw fs8, 112(a1) +; ILP32E-NEXT: flw fs9, 116(a1) +; ILP32E-NEXT: flw fs10, 120(a1) +; ILP32E-NEXT: flw fs11, 124(a1) +; ILP32E-NEXT: fsw fs11, 124(a1) +; ILP32E-NEXT: fsw fs10, 120(a1) +; ILP32E-NEXT: fsw fs9, 116(a1) +; ILP32E-NEXT: fsw fs8, 112(a1) +; ILP32E-NEXT: fsw fs7, 108(a1) +; ILP32E-NEXT: fsw fs6, 104(a1) +; ILP32E-NEXT: fsw fs5, 100(a1) +; ILP32E-NEXT: fsw fs4, 96(a1) ; ILP32E-NEXT: fsw fs3, 92(a1) ; ILP32E-NEXT: fsw fs2, 88(a1) ; ILP32E-NEXT: fsw fs1, 84(a1) @@ -193,22 +193,22 @@ define void @callee() nounwind { ; LP64-NEXT: flw fs1, 84(a1) ; LP64-NEXT: flw fs2, 88(a1) ; LP64-NEXT: flw fs3, 92(a1) -; LP64-NEXT: flw fs4, 112(a1) -; LP64-NEXT: flw fs5, 116(a1) -; LP64-NEXT: flw fs6, 120(a1) -; LP64-NEXT: flw fs7, 124(a1) -; LP64-NEXT: flw fs8, 96(a1) -; LP64-NEXT: flw fs9, 100(a1) -; LP64-NEXT: flw fs10, 104(a1) -; LP64-NEXT: flw fs11, 108(a1) -; LP64-NEXT: fsw fs7, 124(a1) -; LP64-NEXT: fsw fs6, 120(a1) -; LP64-NEXT: fsw fs5, 116(a1) -; LP64-NEXT: fsw fs4, 112(a1) -; LP64-NEXT: fsw fs11, 108(a1) -; LP64-NEXT: fsw fs10, 104(a1) -; LP64-NEXT: fsw fs9, 100(a1) -; LP64-NEXT: fsw fs8, 96(a1) +; LP64-NEXT: flw fs4, 96(a1) +; LP64-NEXT: flw fs5, 100(a1) +; LP64-NEXT: flw fs6, 104(a1) +; LP64-NEXT: flw fs7, 108(a1) +; LP64-NEXT: flw fs8, 112(a1) +; LP64-NEXT: flw fs9, 116(a1) +; LP64-NEXT: flw fs10, 120(a1) +; LP64-NEXT: flw fs11, 124(a1) +; LP64-NEXT: fsw fs11, 124(a1) +; LP64-NEXT: fsw fs10, 120(a1) +; LP64-NEXT: fsw fs9, 116(a1) +; LP64-NEXT: fsw fs8, 112(a1) +; LP64-NEXT: fsw fs7, 108(a1) +; LP64-NEXT: fsw fs6, 104(a1) +; LP64-NEXT: fsw fs5, 100(a1) +; LP64-NEXT: fsw fs4, 96(a1) ; LP64-NEXT: fsw fs3, 92(a1) ; LP64-NEXT: fsw fs2, 88(a1) ; LP64-NEXT: fsw fs1, 84(a1) @@ -263,22 +263,22 @@ define void @callee() nounwind { ; LP64E-NEXT: flw fs1, 84(a1) ; LP64E-NEXT: flw fs2, 88(a1) ; LP64E-NEXT: flw fs3, 92(a1) -; LP64E-NEXT: flw fs4, 112(a1) -; LP64E-NEXT: flw fs5, 116(a1) -; LP64E-NEXT: flw fs6, 120(a1) -; LP64E-NEXT: flw fs7, 124(a1) -; LP64E-NEXT: flw fs8, 96(a1) -; LP64E-NEXT: flw fs9, 100(a1) -; LP64E-NEXT: flw fs10, 104(a1) -; LP64E-NEXT: flw fs11, 108(a1) -; LP64E-NEXT: fsw fs7, 124(a1) -; LP64E-NEXT: fsw fs6, 120(a1) -; LP64E-NEXT: fsw fs5, 116(a1) -; LP64E-NEXT: fsw fs4, 112(a1) -; LP64E-NEXT: fsw fs11, 108(a1) -; LP64E-NEXT: fsw fs10, 104(a1) -; LP64E-NEXT: fsw fs9, 100(a1) -; LP64E-NEXT: fsw fs8, 96(a1) +; LP64E-NEXT: flw fs4, 96(a1) +; LP64E-NEXT: flw fs5, 100(a1) +; LP64E-NEXT: flw fs6, 104(a1) +; LP64E-NEXT: flw fs7, 108(a1) +; LP64E-NEXT: flw fs8, 112(a1) +; LP64E-NEXT: flw fs9, 116(a1) +; LP64E-NEXT: flw fs10, 120(a1) +; LP64E-NEXT: flw fs11, 124(a1) +; LP64E-NEXT: fsw fs11, 124(a1) +; LP64E-NEXT: fsw fs10, 120(a1) +; LP64E-NEXT: fsw fs9, 116(a1) +; LP64E-NEXT: fsw fs8, 112(a1) +; LP64E-NEXT: fsw fs7, 108(a1) +; LP64E-NEXT: fsw fs6, 104(a1) +; LP64E-NEXT: fsw fs5, 100(a1) +; LP64E-NEXT: fsw fs4, 96(a1) ; LP64E-NEXT: fsw fs3, 92(a1) ; LP64E-NEXT: fsw fs2, 88(a1) ; LP64E-NEXT: fsw fs1, 84(a1) @@ -346,22 +346,22 @@ define void @callee() nounwind { ; ILP32F-NEXT: flw fs1, 84(a1) ; ILP32F-NEXT: flw fs2, 88(a1) ; ILP32F-NEXT: flw fs3, 92(a1) -; ILP32F-NEXT: flw fs4, 112(a1) -; ILP32F-NEXT: flw fs5, 116(a1) -; ILP32F-NEXT: flw fs6, 120(a1) -; ILP32F-NEXT: flw fs7, 124(a1) -; ILP32F-NEXT: flw fs8, 96(a1) -; ILP32F-NEXT: flw fs9, 100(a1) -; ILP32F-NEXT: flw fs10, 104(a1) -; ILP32F-NEXT: flw fs11, 108(a1) -; ILP32F-NEXT: fsw fs7, 124(a1) -; ILP32F-NEXT: fsw fs6, 120(a1) -; ILP32F-NEXT: fsw fs5, 116(a1) -; ILP32F-NEXT: fsw fs4, 112(a1) -; ILP32F-NEXT: fsw fs11, 108(a1) -; ILP32F-NEXT: fsw fs10, 104(a1) -; ILP32F-NEXT: fsw fs9, 100(a1) -; ILP32F-NEXT: fsw fs8, 96(a1) +; ILP32F-NEXT: flw fs4, 96(a1) +; ILP32F-NEXT: flw fs5, 100(a1) +; ILP32F-NEXT: flw fs6, 104(a1) +; ILP32F-NEXT: flw fs7, 108(a1) +; ILP32F-NEXT: flw fs8, 112(a1) +; ILP32F-NEXT: flw fs9, 116(a1) +; ILP32F-NEXT: flw fs10, 120(a1) +; ILP32F-NEXT: flw fs11, 124(a1) +; ILP32F-NEXT: fsw fs11, 124(a1) +; ILP32F-NEXT: fsw fs10, 120(a1) +; ILP32F-NEXT: fsw fs9, 116(a1) +; ILP32F-NEXT: fsw fs8, 112(a1) +; ILP32F-NEXT: fsw fs7, 108(a1) +; ILP32F-NEXT: fsw fs6, 104(a1) +; ILP32F-NEXT: fsw fs5, 100(a1) +; ILP32F-NEXT: fsw fs4, 96(a1) ; ILP32F-NEXT: fsw fs3, 92(a1) ; ILP32F-NEXT: fsw fs2, 88(a1) ; ILP32F-NEXT: fsw fs1, 84(a1) @@ -442,22 +442,22 @@ define void @callee() nounwind { ; LP64F-NEXT: flw fs1, 84(a1) ; LP64F-NEXT: flw fs2, 88(a1) ; LP64F-NEXT: flw fs3, 92(a1) -; LP64F-NEXT: flw fs4, 112(a1) -; LP64F-NEXT: flw fs5, 116(a1) -; LP64F-NEXT: flw fs6, 120(a1) -; LP64F-NEXT: flw fs7, 124(a1) -; LP64F-NEXT: flw fs8, 96(a1) -; LP64F-NEXT: flw fs9, 100(a1) -; LP64F-NEXT: flw fs10, 104(a1) -; LP64F-NEXT: flw fs11, 108(a1) -; LP64F-NEXT: fsw fs7, 124(a1) -; LP64F-NEXT: fsw fs6, 120(a1) -; LP64F-NEXT: fsw fs5, 116(a1) -; LP64F-NEXT: fsw fs4, 112(a1) -; LP64F-NEXT: fsw fs11, 108(a1) -; LP64F-NEXT: fsw fs10, 104(a1) -; LP64F-NEXT: fsw fs9, 100(a1) -; LP64F-NEXT: fsw fs8, 96(a1) +; LP64F-NEXT: flw fs4, 96(a1) +; LP64F-NEXT: flw fs5, 100(a1) +; LP64F-NEXT: flw fs6, 104(a1) +; LP64F-NEXT: flw fs7, 108(a1) +; LP64F-NEXT: flw fs8, 112(a1) +; LP64F-NEXT: flw fs9, 116(a1) +; LP64F-NEXT: flw fs10, 120(a1) +; LP64F-NEXT: flw fs11, 124(a1) +; LP64F-NEXT: fsw fs11, 124(a1) +; LP64F-NEXT: fsw fs10, 120(a1) +; LP64F-NEXT: fsw fs9, 116(a1) +; LP64F-NEXT: fsw fs8, 112(a1) +; LP64F-NEXT: fsw fs7, 108(a1) +; LP64F-NEXT: fsw fs6, 104(a1) +; LP64F-NEXT: fsw fs5, 100(a1) +; LP64F-NEXT: fsw fs4, 96(a1) ; LP64F-NEXT: fsw fs3, 92(a1) ; LP64F-NEXT: fsw fs2, 88(a1) ; LP64F-NEXT: fsw fs1, 84(a1) @@ -538,22 +538,22 @@ define void @callee() nounwind { ; ILP32D-NEXT: flw fs1, 84(a1) ; ILP32D-NEXT: flw fs2, 88(a1) ; ILP32D-NEXT: flw fs3, 92(a1) -; ILP32D-NEXT: flw fs4, 112(a1) -; ILP32D-NEXT: flw fs5, 116(a1) -; ILP32D-NEXT: flw fs6, 120(a1) -; ILP32D-NEXT: flw fs7, 124(a1) -; ILP32D-NEXT: flw fs8, 96(a1) -; ILP32D-NEXT: flw fs9, 100(a1) -; ILP32D-NEXT: flw fs10, 104(a1) -; ILP32D-NEXT: flw fs11, 108(a1) -; ILP32D-NEXT: fsw fs7, 124(a1) -; ILP32D-NEXT: fsw fs6, 120(a1) -; ILP32D-NEXT: fsw fs5, 116(a1) -; ILP32D-NEXT: fsw fs4, 112(a1) -; ILP32D-NEXT: fsw fs11, 108(a1) -; ILP32D-NEXT: fsw fs10, 104(a1) -; ILP32D-NEXT: fsw fs9, 100(a1) -; ILP32D-NEXT: fsw fs8, 96(a1) +; ILP32D-NEXT: flw fs4, 96(a1) +; ILP32D-NEXT: flw fs5, 100(a1) +; ILP32D-NEXT: flw fs6, 104(a1) +; ILP32D-NEXT: flw fs7, 108(a1) +; ILP32D-NEXT: flw fs8, 112(a1) +; ILP32D-NEXT: flw fs9, 116(a1) +; ILP32D-NEXT: flw fs10, 120(a1) +; ILP32D-NEXT: flw fs11, 124(a1) +; ILP32D-NEXT: fsw fs11, 124(a1) +; ILP32D-NEXT: fsw fs10, 120(a1) +; ILP32D-NEXT: fsw fs9, 116(a1) +; ILP32D-NEXT: fsw fs8, 112(a1) +; ILP32D-NEXT: fsw fs7, 108(a1) +; ILP32D-NEXT: fsw fs6, 104(a1) +; ILP32D-NEXT: fsw fs5, 100(a1) +; ILP32D-NEXT: fsw fs4, 96(a1) ; ILP32D-NEXT: fsw fs3, 92(a1) ; ILP32D-NEXT: fsw fs2, 88(a1) ; ILP32D-NEXT: fsw fs1, 84(a1) @@ -634,22 +634,22 @@ define void @callee() nounwind { ; LP64D-NEXT: flw fs1, 84(a1) ; LP64D-NEXT: flw fs2, 88(a1) ; LP64D-NEXT: flw fs3, 92(a1) -; LP64D-NEXT: flw fs4, 112(a1) -; LP64D-NEXT: flw fs5, 116(a1) -; LP64D-NEXT: flw fs6, 120(a1) -; LP64D-NEXT: flw fs7, 124(a1) -; LP64D-NEXT: flw fs8, 96(a1) -; LP64D-NEXT: flw fs9, 100(a1) -; LP64D-NEXT: flw fs10, 104(a1) -; LP64D-NEXT: flw fs11, 108(a1) -; LP64D-NEXT: fsw fs7, 124(a1) -; LP64D-NEXT: fsw fs6, 120(a1) -; LP64D-NEXT: fsw fs5, 116(a1) -; LP64D-NEXT: fsw fs4, 112(a1) -; LP64D-NEXT: fsw fs11, 108(a1) -; LP64D-NEXT: fsw fs10, 104(a1) -; LP64D-NEXT: fsw fs9, 100(a1) -; LP64D-NEXT: fsw fs8, 96(a1) +; LP64D-NEXT: flw fs4, 96(a1) +; LP64D-NEXT: flw fs5, 100(a1) +; LP64D-NEXT: flw fs6, 104(a1) +; LP64D-NEXT: flw fs7, 108(a1) +; LP64D-NEXT: flw fs8, 112(a1) +; LP64D-NEXT: flw fs9, 116(a1) +; LP64D-NEXT: flw fs10, 120(a1) +; LP64D-NEXT: flw fs11, 124(a1) +; LP64D-NEXT: fsw fs11, 124(a1) +; LP64D-NEXT: fsw fs10, 120(a1) +; LP64D-NEXT: fsw fs9, 116(a1) +; LP64D-NEXT: fsw fs8, 112(a1) +; LP64D-NEXT: fsw fs7, 108(a1) +; LP64D-NEXT: fsw fs6, 104(a1) +; LP64D-NEXT: fsw fs5, 100(a1) +; LP64D-NEXT: fsw fs4, 96(a1) ; LP64D-NEXT: fsw fs3, 92(a1) ; LP64D-NEXT: fsw fs2, 88(a1) ; LP64D-NEXT: fsw fs1, 84(a1) diff --git a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll index 0501c700f57df..8a97e77bea55d 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-fpr64s.ll @@ -45,26 +45,26 @@ define void @callee() nounwind { ; ILP32-NEXT: fld ft11, 152(a1) ; ILP32-NEXT: fld fs0, 160(a1) ; ILP32-NEXT: fld fs1, 168(a1) -; ILP32-NEXT: fld fs2, 208(a1) -; ILP32-NEXT: fld fs3, 216(a1) -; ILP32-NEXT: fld fs4, 224(a1) -; ILP32-NEXT: fld fs5, 232(a1) -; ILP32-NEXT: fld fs6, 240(a1) -; ILP32-NEXT: fld fs7, 248(a1) -; ILP32-NEXT: fld fs8, 176(a1) -; ILP32-NEXT: fld fs9, 184(a1) -; ILP32-NEXT: fld fs10, 192(a1) -; ILP32-NEXT: fld fs11, 200(a1) -; ILP32-NEXT: fsd fs7, 248(a1) -; ILP32-NEXT: fsd fs6, 240(a1) -; ILP32-NEXT: fsd fs5, 232(a1) -; ILP32-NEXT: fsd fs4, 224(a1) -; ILP32-NEXT: fsd fs3, 216(a1) -; ILP32-NEXT: fsd fs2, 208(a1) -; ILP32-NEXT: fsd fs11, 200(a1) -; ILP32-NEXT: fsd fs10, 192(a1) -; ILP32-NEXT: fsd fs9, 184(a1) -; ILP32-NEXT: fsd fs8, 176(a1) +; ILP32-NEXT: fld fs2, 176(a1) +; ILP32-NEXT: fld fs3, 184(a1) +; ILP32-NEXT: fld fs4, 192(a1) +; ILP32-NEXT: fld fs5, 200(a1) +; ILP32-NEXT: fld fs6, 208(a1) +; ILP32-NEXT: fld fs7, 216(a1) +; ILP32-NEXT: fld fs8, 224(a1) +; ILP32-NEXT: fld fs9, 232(a1) +; ILP32-NEXT: fld fs10, 240(a1) +; ILP32-NEXT: fld fs11, 248(a1) +; ILP32-NEXT: fsd fs11, 248(a1) +; ILP32-NEXT: fsd fs10, 240(a1) +; ILP32-NEXT: fsd fs9, 232(a1) +; ILP32-NEXT: fsd fs8, 224(a1) +; ILP32-NEXT: fsd fs7, 216(a1) +; ILP32-NEXT: fsd fs6, 208(a1) +; ILP32-NEXT: fsd fs5, 200(a1) +; ILP32-NEXT: fsd fs4, 192(a1) +; ILP32-NEXT: fsd fs3, 184(a1) +; ILP32-NEXT: fsd fs2, 176(a1) ; ILP32-NEXT: fsd fs1, 168(a1) ; ILP32-NEXT: fsd fs0, 160(a1) ; ILP32-NEXT: fsd ft11, 152(a1) @@ -115,26 +115,26 @@ define void @callee() nounwind { ; LP64-NEXT: fld ft11, 152(a1) ; LP64-NEXT: fld fs0, 160(a1) ; LP64-NEXT: fld fs1, 168(a1) -; LP64-NEXT: fld fs2, 208(a1) -; LP64-NEXT: fld fs3, 216(a1) -; LP64-NEXT: fld fs4, 224(a1) -; LP64-NEXT: fld fs5, 232(a1) -; LP64-NEXT: fld fs6, 240(a1) -; LP64-NEXT: fld fs7, 248(a1) -; LP64-NEXT: fld fs8, 176(a1) -; LP64-NEXT: fld fs9, 184(a1) -; LP64-NEXT: fld fs10, 192(a1) -; LP64-NEXT: fld fs11, 200(a1) -; LP64-NEXT: fsd fs7, 248(a1) -; LP64-NEXT: fsd fs6, 240(a1) -; LP64-NEXT: fsd fs5, 232(a1) -; LP64-NEXT: fsd fs4, 224(a1) -; LP64-NEXT: fsd fs3, 216(a1) -; LP64-NEXT: fsd fs2, 208(a1) -; LP64-NEXT: fsd fs11, 200(a1) -; LP64-NEXT: fsd fs10, 192(a1) -; LP64-NEXT: fsd fs9, 184(a1) -; LP64-NEXT: fsd fs8, 176(a1) +; LP64-NEXT: fld fs2, 176(a1) +; LP64-NEXT: fld fs3, 184(a1) +; LP64-NEXT: fld fs4, 192(a1) +; LP64-NEXT: fld fs5, 200(a1) +; LP64-NEXT: fld fs6, 208(a1) +; LP64-NEXT: fld fs7, 216(a1) +; LP64-NEXT: fld fs8, 224(a1) +; LP64-NEXT: fld fs9, 232(a1) +; LP64-NEXT: fld fs10, 240(a1) +; LP64-NEXT: fld fs11, 248(a1) +; LP64-NEXT: fsd fs11, 248(a1) +; LP64-NEXT: fsd fs10, 240(a1) +; LP64-NEXT: fsd fs9, 232(a1) +; LP64-NEXT: fsd fs8, 224(a1) +; LP64-NEXT: fsd fs7, 216(a1) +; LP64-NEXT: fsd fs6, 208(a1) +; LP64-NEXT: fsd fs5, 200(a1) +; LP64-NEXT: fsd fs4, 192(a1) +; LP64-NEXT: fsd fs3, 184(a1) +; LP64-NEXT: fsd fs2, 176(a1) ; LP64-NEXT: fsd fs1, 168(a1) ; LP64-NEXT: fsd fs0, 160(a1) ; LP64-NEXT: fsd ft11, 152(a1) @@ -185,26 +185,26 @@ define void @callee() nounwind { ; LP64E-NEXT: fld ft11, 152(a1) ; LP64E-NEXT: fld fs0, 160(a1) ; LP64E-NEXT: fld fs1, 168(a1) -; LP64E-NEXT: fld fs2, 208(a1) -; LP64E-NEXT: fld fs3, 216(a1) -; LP64E-NEXT: fld fs4, 224(a1) -; LP64E-NEXT: fld fs5, 232(a1) -; LP64E-NEXT: fld fs6, 240(a1) -; LP64E-NEXT: fld fs7, 248(a1) -; LP64E-NEXT: fld fs8, 176(a1) -; LP64E-NEXT: fld fs9, 184(a1) -; LP64E-NEXT: fld fs10, 192(a1) -; LP64E-NEXT: fld fs11, 200(a1) -; LP64E-NEXT: fsd fs7, 248(a1) -; LP64E-NEXT: fsd fs6, 240(a1) -; LP64E-NEXT: fsd fs5, 232(a1) -; LP64E-NEXT: fsd fs4, 224(a1) -; LP64E-NEXT: fsd fs3, 216(a1) -; LP64E-NEXT: fsd fs2, 208(a1) -; LP64E-NEXT: fsd fs11, 200(a1) -; LP64E-NEXT: fsd fs10, 192(a1) -; LP64E-NEXT: fsd fs9, 184(a1) -; LP64E-NEXT: fsd fs8, 176(a1) +; LP64E-NEXT: fld fs2, 176(a1) +; LP64E-NEXT: fld fs3, 184(a1) +; LP64E-NEXT: fld fs4, 192(a1) +; LP64E-NEXT: fld fs5, 200(a1) +; LP64E-NEXT: fld fs6, 208(a1) +; LP64E-NEXT: fld fs7, 216(a1) +; LP64E-NEXT: fld fs8, 224(a1) +; LP64E-NEXT: fld fs9, 232(a1) +; LP64E-NEXT: fld fs10, 240(a1) +; LP64E-NEXT: fld fs11, 248(a1) +; LP64E-NEXT: fsd fs11, 248(a1) +; LP64E-NEXT: fsd fs10, 240(a1) +; LP64E-NEXT: fsd fs9, 232(a1) +; LP64E-NEXT: fsd fs8, 224(a1) +; LP64E-NEXT: fsd fs7, 216(a1) +; LP64E-NEXT: fsd fs6, 208(a1) +; LP64E-NEXT: fsd fs5, 200(a1) +; LP64E-NEXT: fsd fs4, 192(a1) +; LP64E-NEXT: fsd fs3, 184(a1) +; LP64E-NEXT: fsd fs2, 176(a1) ; LP64E-NEXT: fsd fs1, 168(a1) ; LP64E-NEXT: fsd fs0, 160(a1) ; LP64E-NEXT: fsd ft11, 152(a1) @@ -268,26 +268,26 @@ define void @callee() nounwind { ; ILP32D-NEXT: fld ft11, 152(a1) ; ILP32D-NEXT: fld fs0, 160(a1) ; ILP32D-NEXT: fld fs1, 168(a1) -; ILP32D-NEXT: fld fs2, 208(a1) -; ILP32D-NEXT: fld fs3, 216(a1) -; ILP32D-NEXT: fld fs4, 224(a1) -; ILP32D-NEXT: fld fs5, 232(a1) -; ILP32D-NEXT: fld fs6, 240(a1) -; ILP32D-NEXT: fld fs7, 248(a1) -; ILP32D-NEXT: fld fs8, 176(a1) -; ILP32D-NEXT: fld fs9, 184(a1) -; ILP32D-NEXT: fld fs10, 192(a1) -; ILP32D-NEXT: fld fs11, 200(a1) -; ILP32D-NEXT: fsd fs7, 248(a1) -; ILP32D-NEXT: fsd fs6, 240(a1) -; ILP32D-NEXT: fsd fs5, 232(a1) -; ILP32D-NEXT: fsd fs4, 224(a1) -; ILP32D-NEXT: fsd fs3, 216(a1) -; ILP32D-NEXT: fsd fs2, 208(a1) -; ILP32D-NEXT: fsd fs11, 200(a1) -; ILP32D-NEXT: fsd fs10, 192(a1) -; ILP32D-NEXT: fsd fs9, 184(a1) -; ILP32D-NEXT: fsd fs8, 176(a1) +; ILP32D-NEXT: fld fs2, 176(a1) +; ILP32D-NEXT: fld fs3, 184(a1) +; ILP32D-NEXT: fld fs4, 192(a1) +; ILP32D-NEXT: fld fs5, 200(a1) +; ILP32D-NEXT: fld fs6, 208(a1) +; ILP32D-NEXT: fld fs7, 216(a1) +; ILP32D-NEXT: fld fs8, 224(a1) +; ILP32D-NEXT: fld fs9, 232(a1) +; ILP32D-NEXT: fld fs10, 240(a1) +; ILP32D-NEXT: fld fs11, 248(a1) +; ILP32D-NEXT: fsd fs11, 248(a1) +; ILP32D-NEXT: fsd fs10, 240(a1) +; ILP32D-NEXT: fsd fs9, 232(a1) +; ILP32D-NEXT: fsd fs8, 224(a1) +; ILP32D-NEXT: fsd fs7, 216(a1) +; ILP32D-NEXT: fsd fs6, 208(a1) +; ILP32D-NEXT: fsd fs5, 200(a1) +; ILP32D-NEXT: fsd fs4, 192(a1) +; ILP32D-NEXT: fsd fs3, 184(a1) +; ILP32D-NEXT: fsd fs2, 176(a1) ; ILP32D-NEXT: fsd fs1, 168(a1) ; ILP32D-NEXT: fsd fs0, 160(a1) ; ILP32D-NEXT: fsd ft11, 152(a1) @@ -364,26 +364,26 @@ define void @callee() nounwind { ; LP64D-NEXT: fld ft11, 152(a1) ; LP64D-NEXT: fld fs0, 160(a1) ; LP64D-NEXT: fld fs1, 168(a1) -; LP64D-NEXT: fld fs2, 208(a1) -; LP64D-NEXT: fld fs3, 216(a1) -; LP64D-NEXT: fld fs4, 224(a1) -; LP64D-NEXT: fld fs5, 232(a1) -; LP64D-NEXT: fld fs6, 240(a1) -; LP64D-NEXT: fld fs7, 248(a1) -; LP64D-NEXT: fld fs8, 176(a1) -; LP64D-NEXT: fld fs9, 184(a1) -; LP64D-NEXT: fld fs10, 192(a1) -; LP64D-NEXT: fld fs11, 200(a1) -; LP64D-NEXT: fsd fs7, 248(a1) -; LP64D-NEXT: fsd fs6, 240(a1) -; LP64D-NEXT: fsd fs5, 232(a1) -; LP64D-NEXT: fsd fs4, 224(a1) -; LP64D-NEXT: fsd fs3, 216(a1) -; LP64D-NEXT: fsd fs2, 208(a1) -; LP64D-NEXT: fsd fs11, 200(a1) -; LP64D-NEXT: fsd fs10, 192(a1) -; LP64D-NEXT: fsd fs9, 184(a1) -; LP64D-NEXT: fsd fs8, 176(a1) +; LP64D-NEXT: fld fs2, 176(a1) +; LP64D-NEXT: fld fs3, 184(a1) +; LP64D-NEXT: fld fs4, 192(a1) +; LP64D-NEXT: fld fs5, 200(a1) +; LP64D-NEXT: fld fs6, 208(a1) +; LP64D-NEXT: fld fs7, 216(a1) +; LP64D-NEXT: fld fs8, 224(a1) +; LP64D-NEXT: fld fs9, 232(a1) +; LP64D-NEXT: fld fs10, 240(a1) +; LP64D-NEXT: fld fs11, 248(a1) +; LP64D-NEXT: fsd fs11, 248(a1) +; LP64D-NEXT: fsd fs10, 240(a1) +; LP64D-NEXT: fsd fs9, 232(a1) +; LP64D-NEXT: fsd fs8, 224(a1) +; LP64D-NEXT: fsd fs7, 216(a1) +; LP64D-NEXT: fsd fs6, 208(a1) +; LP64D-NEXT: fsd fs5, 200(a1) +; LP64D-NEXT: fsd fs4, 192(a1) +; LP64D-NEXT: fsd fs3, 184(a1) +; LP64D-NEXT: fsd fs2, 176(a1) ; LP64D-NEXT: fsd fs1, 168(a1) ; LP64D-NEXT: fsd fs0, 160(a1) ; LP64D-NEXT: fsd ft11, 152(a1) diff --git a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll index f9f1ba60a8ac0..53a4b1bafaab6 100644 --- a/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll +++ b/llvm/test/CodeGen/RISCV/callee-saved-gprs.ll @@ -68,16 +68,16 @@ define void @callee() { ; RV32I-NEXT: .cfi_offset s9, -44 ; RV32I-NEXT: .cfi_offset s10, -48 ; RV32I-NEXT: .cfi_offset s11, -52 -; RV32I-NEXT: lui a7, %hi(var) -; RV32I-NEXT: lw a0, %lo(var)(a7) +; RV32I-NEXT: lui a4, %hi(var) +; RV32I-NEXT: lw a0, %lo(var)(a4) ; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var+4)(a7) +; RV32I-NEXT: lw a0, %lo(var+4)(a4) ; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var+8)(a7) +; RV32I-NEXT: lw a0, %lo(var+8)(a4) ; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var+12)(a7) +; RV32I-NEXT: lw a0, %lo(var+12)(a4) ; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi a5, a7, %lo(var) +; RV32I-NEXT: addi a5, a4, %lo(var) ; RV32I-NEXT: lw a0, 16(a5) ; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, 20(a5) @@ -100,22 +100,22 @@ define void @callee() { ; RV32I-NEXT: lw s8, 84(a5) ; RV32I-NEXT: lw s9, 88(a5) ; RV32I-NEXT: lw s10, 92(a5) -; RV32I-NEXT: lw s11, 112(a5) -; RV32I-NEXT: lw ra, 116(a5) -; RV32I-NEXT: lw a3, 120(a5) -; RV32I-NEXT: lw a0, 124(a5) -; RV32I-NEXT: lw a6, 96(a5) -; RV32I-NEXT: lw a4, 100(a5) -; RV32I-NEXT: lw a2, 104(a5) -; RV32I-NEXT: lw a1, 108(a5) -; RV32I-NEXT: sw a0, 124(a5) -; RV32I-NEXT: sw a3, 120(a5) -; RV32I-NEXT: sw ra, 116(a5) -; RV32I-NEXT: sw s11, 112(a5) -; RV32I-NEXT: sw a1, 108(a5) -; RV32I-NEXT: sw a2, 104(a5) -; RV32I-NEXT: sw a4, 100(a5) -; RV32I-NEXT: sw a6, 96(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) +; RV32I-NEXT: lw a6, 104(a5) +; RV32I-NEXT: lw a3, 108(a5) +; RV32I-NEXT: lw a2, 112(a5) +; RV32I-NEXT: lw a1, 116(a5) +; RV32I-NEXT: lw a0, 120(a5) +; RV32I-NEXT: lw a7, 124(a5) +; RV32I-NEXT: sw a7, 124(a5) +; RV32I-NEXT: sw a0, 120(a5) +; RV32I-NEXT: sw a1, 116(a5) +; RV32I-NEXT: sw a2, 112(a5) +; RV32I-NEXT: sw a3, 108(a5) +; RV32I-NEXT: sw a6, 104(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) ; RV32I-NEXT: sw s10, 92(a5) ; RV32I-NEXT: sw s9, 88(a5) ; RV32I-NEXT: sw s8, 84(a5) @@ -139,13 +139,13 @@ define void @callee() { ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: sw a0, 16(a5) ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var+12)(a7) +; RV32I-NEXT: sw a0, %lo(var+12)(a4) ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var+8)(a7) +; RV32I-NEXT: sw a0, %lo(var+8)(a4) ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var+4)(a7) +; RV32I-NEXT: sw a0, %lo(var+4)(a4) ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var)(a7) +; RV32I-NEXT: sw a0, %lo(var)(a4) ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload @@ -186,16 +186,16 @@ define void @callee() { ; RV32I-ILP32E-NEXT: .cfi_offset ra, -4 ; RV32I-ILP32E-NEXT: .cfi_offset s0, -8 ; RV32I-ILP32E-NEXT: .cfi_offset s1, -12 -; RV32I-ILP32E-NEXT: lui a7, %hi(var) -; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a7) +; RV32I-ILP32E-NEXT: lui a4, %hi(var) +; RV32I-ILP32E-NEXT: lw a0, %lo(var)(a4) ; RV32I-ILP32E-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a7) +; RV32I-ILP32E-NEXT: lw a0, %lo(var+4)(a4) ; RV32I-ILP32E-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a7) +; RV32I-ILP32E-NEXT: lw a0, %lo(var+8)(a4) ; RV32I-ILP32E-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a7) +; RV32I-ILP32E-NEXT: lw a0, %lo(var+12)(a4) ; RV32I-ILP32E-NEXT: sw a0, 8(sp) # 4-byte Folded Spill -; RV32I-ILP32E-NEXT: addi a5, a7, %lo(var) +; RV32I-ILP32E-NEXT: addi a5, a4, %lo(var) ; RV32I-ILP32E-NEXT: lw a0, 16(a5) ; RV32I-ILP32E-NEXT: sw a0, 4(sp) # 4-byte Folded Spill ; RV32I-ILP32E-NEXT: lw a0, 20(a5) @@ -218,22 +218,22 @@ define void @callee() { ; RV32I-ILP32E-NEXT: lw s10, 84(a5) ; RV32I-ILP32E-NEXT: lw s11, 88(a5) ; RV32I-ILP32E-NEXT: lw s0, 92(a5) -; RV32I-ILP32E-NEXT: lw s1, 112(a5) -; RV32I-ILP32E-NEXT: lw ra, 116(a5) -; RV32I-ILP32E-NEXT: lw a3, 120(a5) -; RV32I-ILP32E-NEXT: lw a0, 124(a5) -; RV32I-ILP32E-NEXT: lw a6, 96(a5) -; RV32I-ILP32E-NEXT: lw a4, 100(a5) -; RV32I-ILP32E-NEXT: lw a2, 104(a5) -; RV32I-ILP32E-NEXT: lw a1, 108(a5) -; RV32I-ILP32E-NEXT: sw a0, 124(a5) -; RV32I-ILP32E-NEXT: sw a3, 120(a5) -; RV32I-ILP32E-NEXT: sw ra, 116(a5) -; RV32I-ILP32E-NEXT: sw s1, 112(a5) -; RV32I-ILP32E-NEXT: sw a1, 108(a5) -; RV32I-ILP32E-NEXT: sw a2, 104(a5) -; RV32I-ILP32E-NEXT: sw a4, 100(a5) -; RV32I-ILP32E-NEXT: sw a6, 96(a5) +; RV32I-ILP32E-NEXT: lw s1, 96(a5) +; RV32I-ILP32E-NEXT: lw ra, 100(a5) +; RV32I-ILP32E-NEXT: lw a6, 104(a5) +; RV32I-ILP32E-NEXT: lw a3, 108(a5) +; RV32I-ILP32E-NEXT: lw a2, 112(a5) +; RV32I-ILP32E-NEXT: lw a1, 116(a5) +; RV32I-ILP32E-NEXT: lw a0, 120(a5) +; RV32I-ILP32E-NEXT: lw a7, 124(a5) +; RV32I-ILP32E-NEXT: sw a7, 124(a5) +; RV32I-ILP32E-NEXT: sw a0, 120(a5) +; RV32I-ILP32E-NEXT: sw a1, 116(a5) +; RV32I-ILP32E-NEXT: sw a2, 112(a5) +; RV32I-ILP32E-NEXT: sw a3, 108(a5) +; RV32I-ILP32E-NEXT: sw a6, 104(a5) +; RV32I-ILP32E-NEXT: sw ra, 100(a5) +; RV32I-ILP32E-NEXT: sw s1, 96(a5) ; RV32I-ILP32E-NEXT: sw s0, 92(a5) ; RV32I-ILP32E-NEXT: sw s11, 88(a5) ; RV32I-ILP32E-NEXT: sw s10, 84(a5) @@ -257,13 +257,13 @@ define void @callee() { ; RV32I-ILP32E-NEXT: lw a0, 4(sp) # 4-byte Folded Reload ; RV32I-ILP32E-NEXT: sw a0, 16(a5) ; RV32I-ILP32E-NEXT: lw a0, 8(sp) # 4-byte Folded Reload -; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a7) +; RV32I-ILP32E-NEXT: sw a0, %lo(var+12)(a4) ; RV32I-ILP32E-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a7) +; RV32I-ILP32E-NEXT: sw a0, %lo(var+8)(a4) ; RV32I-ILP32E-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a7) +; RV32I-ILP32E-NEXT: sw a0, %lo(var+4)(a4) ; RV32I-ILP32E-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a7) +; RV32I-ILP32E-NEXT: sw a0, %lo(var)(a4) ; RV32I-ILP32E-NEXT: lw ra, 32(sp) # 4-byte Folded Reload ; RV32I-ILP32E-NEXT: lw s0, 28(sp) # 4-byte Folded Reload ; RV32I-ILP32E-NEXT: lw s1, 24(sp) # 4-byte Folded Reload @@ -306,16 +306,16 @@ define void @callee() { ; RV32I-WITH-FP-NEXT: .cfi_offset s11, -52 ; RV32I-WITH-FP-NEXT: addi s0, sp, 80 ; RV32I-WITH-FP-NEXT: .cfi_def_cfa s0, 0 -; RV32I-WITH-FP-NEXT: lui t0, %hi(var) -; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(t0) +; RV32I-WITH-FP-NEXT: lui a4, %hi(var) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var)(a4) ; RV32I-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(t0) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+4)(a4) ; RV32I-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(t0) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+8)(a4) ; RV32I-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(t0) +; RV32I-WITH-FP-NEXT: lw a0, %lo(var+12)(a4) ; RV32I-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill -; RV32I-WITH-FP-NEXT: addi a5, t0, %lo(var) +; RV32I-WITH-FP-NEXT: addi a5, a4, %lo(var) ; RV32I-WITH-FP-NEXT: lw a0, 16(a5) ; RV32I-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill ; RV32I-WITH-FP-NEXT: lw a0, 20(a5) @@ -339,22 +339,22 @@ define void @callee() { ; RV32I-WITH-FP-NEXT: lw s9, 84(a5) ; RV32I-WITH-FP-NEXT: lw s10, 88(a5) ; RV32I-WITH-FP-NEXT: lw s11, 92(a5) -; RV32I-WITH-FP-NEXT: lw ra, 112(a5) -; RV32I-WITH-FP-NEXT: lw a4, 116(a5) -; RV32I-WITH-FP-NEXT: lw a3, 120(a5) -; RV32I-WITH-FP-NEXT: lw a0, 124(a5) -; RV32I-WITH-FP-NEXT: lw a7, 96(a5) -; RV32I-WITH-FP-NEXT: lw a6, 100(a5) -; RV32I-WITH-FP-NEXT: lw a2, 104(a5) -; RV32I-WITH-FP-NEXT: lw a1, 108(a5) -; RV32I-WITH-FP-NEXT: sw a0, 124(a5) -; RV32I-WITH-FP-NEXT: sw a3, 120(a5) -; RV32I-WITH-FP-NEXT: sw a4, 116(a5) -; RV32I-WITH-FP-NEXT: sw ra, 112(a5) -; RV32I-WITH-FP-NEXT: sw a1, 108(a5) -; RV32I-WITH-FP-NEXT: sw a2, 104(a5) -; RV32I-WITH-FP-NEXT: sw a6, 100(a5) -; RV32I-WITH-FP-NEXT: sw a7, 96(a5) +; RV32I-WITH-FP-NEXT: lw ra, 96(a5) +; RV32I-WITH-FP-NEXT: lw a7, 100(a5) +; RV32I-WITH-FP-NEXT: lw a6, 104(a5) +; RV32I-WITH-FP-NEXT: lw a3, 108(a5) +; RV32I-WITH-FP-NEXT: lw a2, 112(a5) +; RV32I-WITH-FP-NEXT: lw a1, 116(a5) +; RV32I-WITH-FP-NEXT: lw a0, 120(a5) +; RV32I-WITH-FP-NEXT: lw t0, 124(a5) +; RV32I-WITH-FP-NEXT: sw t0, 124(a5) +; RV32I-WITH-FP-NEXT: sw a0, 120(a5) +; RV32I-WITH-FP-NEXT: sw a1, 116(a5) +; RV32I-WITH-FP-NEXT: sw a2, 112(a5) +; RV32I-WITH-FP-NEXT: sw a3, 108(a5) +; RV32I-WITH-FP-NEXT: sw a6, 104(a5) +; RV32I-WITH-FP-NEXT: sw a7, 100(a5) +; RV32I-WITH-FP-NEXT: sw ra, 96(a5) ; RV32I-WITH-FP-NEXT: sw s11, 92(a5) ; RV32I-WITH-FP-NEXT: sw s10, 88(a5) ; RV32I-WITH-FP-NEXT: sw s9, 84(a5) @@ -379,13 +379,13 @@ define void @callee() { ; RV32I-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: sw a0, 16(a5) ; RV32I-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(t0) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+12)(a4) ; RV32I-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(t0) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+8)(a4) ; RV32I-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(t0) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var+4)(a4) ; RV32I-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload -; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(t0) +; RV32I-WITH-FP-NEXT: sw a0, %lo(var)(a4) ; RV32I-WITH-FP-NEXT: .cfi_def_cfa sp, 80 ; RV32I-WITH-FP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-WITH-FP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload @@ -434,16 +434,16 @@ define void @callee() { ; RV32IZCMP-NEXT: .cfi_offset s9, -12 ; RV32IZCMP-NEXT: .cfi_offset s10, -8 ; RV32IZCMP-NEXT: .cfi_offset s11, -4 -; RV32IZCMP-NEXT: lui t0, %hi(var) -; RV32IZCMP-NEXT: lw a0, %lo(var)(t0) +; RV32IZCMP-NEXT: lui a4, %hi(var) +; RV32IZCMP-NEXT: lw a0, %lo(var)(a4) ; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var+4)(t0) +; RV32IZCMP-NEXT: lw a0, %lo(var+4)(a4) ; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var+8)(t0) +; RV32IZCMP-NEXT: lw a0, %lo(var+8)(a4) ; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var+12)(t0) +; RV32IZCMP-NEXT: lw a0, %lo(var+12)(a4) ; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: addi a5, t0, %lo(var) +; RV32IZCMP-NEXT: addi a5, a4, %lo(var) ; RV32IZCMP-NEXT: lw a0, 16(a5) ; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32IZCMP-NEXT: lw a0, 20(a5) @@ -463,28 +463,28 @@ define void @callee() { ; RV32IZCMP-NEXT: lw s11, 72(a5) ; RV32IZCMP-NEXT: lw ra, 76(a5) ; RV32IZCMP-NEXT: lw s1, 80(a5) -; RV32IZCMP-NEXT: lw t3, 84(a5) -; RV32IZCMP-NEXT: lw t2, 88(a5) -; RV32IZCMP-NEXT: lw t1, 92(a5) -; RV32IZCMP-NEXT: lw a7, 112(a5) -; RV32IZCMP-NEXT: lw s0, 116(a5) -; RV32IZCMP-NEXT: lw a3, 120(a5) -; RV32IZCMP-NEXT: lw a0, 124(a5) -; RV32IZCMP-NEXT: lw a6, 96(a5) -; RV32IZCMP-NEXT: lw a4, 100(a5) -; RV32IZCMP-NEXT: lw a2, 104(a5) -; RV32IZCMP-NEXT: lw a1, 108(a5) -; RV32IZCMP-NEXT: sw a0, 124(a5) -; RV32IZCMP-NEXT: sw a3, 120(a5) -; RV32IZCMP-NEXT: sw s0, 116(a5) -; RV32IZCMP-NEXT: sw a7, 112(a5) -; RV32IZCMP-NEXT: sw a1, 108(a5) -; RV32IZCMP-NEXT: sw a2, 104(a5) -; RV32IZCMP-NEXT: sw a4, 100(a5) -; RV32IZCMP-NEXT: sw a6, 96(a5) -; RV32IZCMP-NEXT: sw t1, 92(a5) -; RV32IZCMP-NEXT: sw t2, 88(a5) -; RV32IZCMP-NEXT: sw t3, 84(a5) +; RV32IZCMP-NEXT: lw t2, 84(a5) +; RV32IZCMP-NEXT: lw t1, 88(a5) +; RV32IZCMP-NEXT: lw t0, 92(a5) +; RV32IZCMP-NEXT: lw a7, 96(a5) +; RV32IZCMP-NEXT: lw s0, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a3, 108(a5) +; RV32IZCMP-NEXT: lw a2, 112(a5) +; RV32IZCMP-NEXT: lw a1, 116(a5) +; RV32IZCMP-NEXT: lw a0, 120(a5) +; RV32IZCMP-NEXT: lw t3, 124(a5) +; RV32IZCMP-NEXT: sw t3, 124(a5) +; RV32IZCMP-NEXT: sw a0, 120(a5) +; RV32IZCMP-NEXT: sw a1, 116(a5) +; RV32IZCMP-NEXT: sw a2, 112(a5) +; RV32IZCMP-NEXT: sw a3, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw s0, 100(a5) +; RV32IZCMP-NEXT: sw a7, 96(a5) +; RV32IZCMP-NEXT: sw t0, 92(a5) +; RV32IZCMP-NEXT: sw t1, 88(a5) +; RV32IZCMP-NEXT: sw t2, 84(a5) ; RV32IZCMP-NEXT: sw s1, 80(a5) ; RV32IZCMP-NEXT: sw ra, 76(a5) ; RV32IZCMP-NEXT: sw s11, 72(a5) @@ -505,13 +505,13 @@ define void @callee() { ; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: sw a0, 16(a5) ; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var+12)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var+12)(a4) ; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var+8)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var+8)(a4) ; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var+4)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var+4)(a4) ; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var)(a4) ; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 80 ; ; RV32IZCMP-WITH-FP-LABEL: callee: @@ -546,16 +546,16 @@ define void @callee() { ; RV32IZCMP-WITH-FP-NEXT: .cfi_offset s11, -52 ; RV32IZCMP-WITH-FP-NEXT: addi s0, sp, 80 ; RV32IZCMP-WITH-FP-NEXT: .cfi_def_cfa s0, 0 -; RV32IZCMP-WITH-FP-NEXT: lui t1, %hi(var) -; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(t1) +; RV32IZCMP-WITH-FP-NEXT: lui a4, %hi(var) +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(a4) ; RV32IZCMP-WITH-FP-NEXT: sw a0, -56(s0) # 4-byte Folded Spill -; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(t1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(a4) ; RV32IZCMP-WITH-FP-NEXT: sw a0, -60(s0) # 4-byte Folded Spill -; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(t1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(a4) ; RV32IZCMP-WITH-FP-NEXT: sw a0, -64(s0) # 4-byte Folded Spill -; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(t1) +; RV32IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(a4) ; RV32IZCMP-WITH-FP-NEXT: sw a0, -68(s0) # 4-byte Folded Spill -; RV32IZCMP-WITH-FP-NEXT: addi a5, t1, %lo(var) +; RV32IZCMP-WITH-FP-NEXT: addi a5, a4, %lo(var) ; RV32IZCMP-WITH-FP-NEXT: lw a0, 16(a5) ; RV32IZCMP-WITH-FP-NEXT: sw a0, -72(s0) # 4-byte Folded Spill ; RV32IZCMP-WITH-FP-NEXT: lw a0, 20(a5) @@ -575,30 +575,30 @@ define void @callee() { ; RV32IZCMP-WITH-FP-NEXT: lw s10, 68(a5) ; RV32IZCMP-WITH-FP-NEXT: lw s11, 72(a5) ; RV32IZCMP-WITH-FP-NEXT: lw ra, 76(a5) -; RV32IZCMP-WITH-FP-NEXT: lw t4, 80(a5) -; RV32IZCMP-WITH-FP-NEXT: lw t3, 84(a5) -; RV32IZCMP-WITH-FP-NEXT: lw t2, 88(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t3, 80(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t2, 84(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t1, 88(a5) ; RV32IZCMP-WITH-FP-NEXT: lw s1, 92(a5) -; RV32IZCMP-WITH-FP-NEXT: lw t0, 112(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a4, 116(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a3, 120(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a0, 124(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a7, 96(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a6, 100(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a2, 104(a5) -; RV32IZCMP-WITH-FP-NEXT: lw a1, 108(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a0, 124(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a3, 120(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a4, 116(a5) -; RV32IZCMP-WITH-FP-NEXT: sw t0, 112(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a1, 108(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a2, 104(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a6, 100(a5) -; RV32IZCMP-WITH-FP-NEXT: sw a7, 96(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t0, 96(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a7, 100(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a6, 104(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a3, 108(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a2, 112(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a1, 116(a5) +; RV32IZCMP-WITH-FP-NEXT: lw a0, 120(a5) +; RV32IZCMP-WITH-FP-NEXT: lw t4, 124(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t4, 124(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a0, 120(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a1, 116(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a2, 112(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a3, 108(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a6, 104(a5) +; RV32IZCMP-WITH-FP-NEXT: sw a7, 100(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t0, 96(a5) ; RV32IZCMP-WITH-FP-NEXT: sw s1, 92(a5) -; RV32IZCMP-WITH-FP-NEXT: sw t2, 88(a5) -; RV32IZCMP-WITH-FP-NEXT: sw t3, 84(a5) -; RV32IZCMP-WITH-FP-NEXT: sw t4, 80(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t1, 88(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t2, 84(a5) +; RV32IZCMP-WITH-FP-NEXT: sw t3, 80(a5) ; RV32IZCMP-WITH-FP-NEXT: sw ra, 76(a5) ; RV32IZCMP-WITH-FP-NEXT: sw s11, 72(a5) ; RV32IZCMP-WITH-FP-NEXT: sw s10, 68(a5) @@ -619,13 +619,13 @@ define void @callee() { ; RV32IZCMP-WITH-FP-NEXT: lw a0, -72(s0) # 4-byte Folded Reload ; RV32IZCMP-WITH-FP-NEXT: sw a0, 16(a5) ; RV32IZCMP-WITH-FP-NEXT: lw a0, -68(s0) # 4-byte Folded Reload -; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(t1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(a4) ; RV32IZCMP-WITH-FP-NEXT: lw a0, -64(s0) # 4-byte Folded Reload -; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(t1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(a4) ; RV32IZCMP-WITH-FP-NEXT: lw a0, -60(s0) # 4-byte Folded Reload -; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(t1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(a4) ; RV32IZCMP-WITH-FP-NEXT: lw a0, -56(s0) # 4-byte Folded Reload -; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(t1) +; RV32IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(a4) ; RV32IZCMP-WITH-FP-NEXT: .cfi_def_cfa sp, 80 ; RV32IZCMP-WITH-FP-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32IZCMP-WITH-FP-NEXT: lw s0, 72(sp) # 4-byte Folded Reload @@ -687,16 +687,16 @@ define void @callee() { ; RV64I-NEXT: .cfi_offset s9, -88 ; RV64I-NEXT: .cfi_offset s10, -96 ; RV64I-NEXT: .cfi_offset s11, -104 -; RV64I-NEXT: lui a7, %hi(var) -; RV64I-NEXT: lw a0, %lo(var)(a7) +; RV64I-NEXT: lui a4, %hi(var) +; RV64I-NEXT: lw a0, %lo(var)(a4) ; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var+4)(a7) +; RV64I-NEXT: lw a0, %lo(var+4)(a4) ; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var+8)(a7) +; RV64I-NEXT: lw a0, %lo(var+8)(a4) ; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var+12)(a7) +; RV64I-NEXT: lw a0, %lo(var+12)(a4) ; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a5, a7, %lo(var) +; RV64I-NEXT: addi a5, a4, %lo(var) ; RV64I-NEXT: lw a0, 16(a5) ; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 20(a5) @@ -719,22 +719,22 @@ define void @callee() { ; RV64I-NEXT: lw s8, 84(a5) ; RV64I-NEXT: lw s9, 88(a5) ; RV64I-NEXT: lw s10, 92(a5) -; RV64I-NEXT: lw s11, 112(a5) -; RV64I-NEXT: lw ra, 116(a5) -; RV64I-NEXT: lw a3, 120(a5) -; RV64I-NEXT: lw a0, 124(a5) -; RV64I-NEXT: lw a6, 96(a5) -; RV64I-NEXT: lw a4, 100(a5) -; RV64I-NEXT: lw a2, 104(a5) -; RV64I-NEXT: lw a1, 108(a5) -; RV64I-NEXT: sw a0, 124(a5) -; RV64I-NEXT: sw a3, 120(a5) -; RV64I-NEXT: sw ra, 116(a5) -; RV64I-NEXT: sw s11, 112(a5) -; RV64I-NEXT: sw a1, 108(a5) -; RV64I-NEXT: sw a2, 104(a5) -; RV64I-NEXT: sw a4, 100(a5) -; RV64I-NEXT: sw a6, 96(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) +; RV64I-NEXT: lw a6, 104(a5) +; RV64I-NEXT: lw a3, 108(a5) +; RV64I-NEXT: lw a2, 112(a5) +; RV64I-NEXT: lw a1, 116(a5) +; RV64I-NEXT: lw a0, 120(a5) +; RV64I-NEXT: lw a7, 124(a5) +; RV64I-NEXT: sw a7, 124(a5) +; RV64I-NEXT: sw a0, 120(a5) +; RV64I-NEXT: sw a1, 116(a5) +; RV64I-NEXT: sw a2, 112(a5) +; RV64I-NEXT: sw a3, 108(a5) +; RV64I-NEXT: sw a6, 104(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) ; RV64I-NEXT: sw s10, 92(a5) ; RV64I-NEXT: sw s9, 88(a5) ; RV64I-NEXT: sw s8, 84(a5) @@ -758,13 +758,13 @@ define void @callee() { ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: sw a0, 16(a5) ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var+12)(a7) +; RV64I-NEXT: sw a0, %lo(var+12)(a4) ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var+8)(a7) +; RV64I-NEXT: sw a0, %lo(var+8)(a4) ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var+4)(a7) +; RV64I-NEXT: sw a0, %lo(var+4)(a4) ; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var)(a7) +; RV64I-NEXT: sw a0, %lo(var)(a4) ; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload @@ -805,16 +805,16 @@ define void @callee() { ; RV64I-LP64E-NEXT: .cfi_offset ra, -8 ; RV64I-LP64E-NEXT: .cfi_offset s0, -16 ; RV64I-LP64E-NEXT: .cfi_offset s1, -24 -; RV64I-LP64E-NEXT: lui a7, %hi(var) -; RV64I-LP64E-NEXT: lw a0, %lo(var)(a7) +; RV64I-LP64E-NEXT: lui a4, %hi(var) +; RV64I-LP64E-NEXT: lw a0, %lo(var)(a4) ; RV64I-LP64E-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64I-LP64E-NEXT: lw a0, %lo(var+4)(a7) +; RV64I-LP64E-NEXT: lw a0, %lo(var+4)(a4) ; RV64I-LP64E-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64I-LP64E-NEXT: lw a0, %lo(var+8)(a7) +; RV64I-LP64E-NEXT: lw a0, %lo(var+8)(a4) ; RV64I-LP64E-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64I-LP64E-NEXT: lw a0, %lo(var+12)(a7) +; RV64I-LP64E-NEXT: lw a0, %lo(var+12)(a4) ; RV64I-LP64E-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64I-LP64E-NEXT: addi a5, a7, %lo(var) +; RV64I-LP64E-NEXT: addi a5, a4, %lo(var) ; RV64I-LP64E-NEXT: lw a0, 16(a5) ; RV64I-LP64E-NEXT: sd a0, 8(sp) # 8-byte Folded Spill ; RV64I-LP64E-NEXT: lw a0, 20(a5) @@ -837,22 +837,22 @@ define void @callee() { ; RV64I-LP64E-NEXT: lw s10, 84(a5) ; RV64I-LP64E-NEXT: lw s11, 88(a5) ; RV64I-LP64E-NEXT: lw s0, 92(a5) -; RV64I-LP64E-NEXT: lw s1, 112(a5) -; RV64I-LP64E-NEXT: lw ra, 116(a5) -; RV64I-LP64E-NEXT: lw a3, 120(a5) -; RV64I-LP64E-NEXT: lw a0, 124(a5) -; RV64I-LP64E-NEXT: lw a6, 96(a5) -; RV64I-LP64E-NEXT: lw a4, 100(a5) -; RV64I-LP64E-NEXT: lw a2, 104(a5) -; RV64I-LP64E-NEXT: lw a1, 108(a5) -; RV64I-LP64E-NEXT: sw a0, 124(a5) -; RV64I-LP64E-NEXT: sw a3, 120(a5) -; RV64I-LP64E-NEXT: sw ra, 116(a5) -; RV64I-LP64E-NEXT: sw s1, 112(a5) -; RV64I-LP64E-NEXT: sw a1, 108(a5) -; RV64I-LP64E-NEXT: sw a2, 104(a5) -; RV64I-LP64E-NEXT: sw a4, 100(a5) -; RV64I-LP64E-NEXT: sw a6, 96(a5) +; RV64I-LP64E-NEXT: lw s1, 96(a5) +; RV64I-LP64E-NEXT: lw ra, 100(a5) +; RV64I-LP64E-NEXT: lw a6, 104(a5) +; RV64I-LP64E-NEXT: lw a3, 108(a5) +; RV64I-LP64E-NEXT: lw a2, 112(a5) +; RV64I-LP64E-NEXT: lw a1, 116(a5) +; RV64I-LP64E-NEXT: lw a0, 120(a5) +; RV64I-LP64E-NEXT: lw a7, 124(a5) +; RV64I-LP64E-NEXT: sw a7, 124(a5) +; RV64I-LP64E-NEXT: sw a0, 120(a5) +; RV64I-LP64E-NEXT: sw a1, 116(a5) +; RV64I-LP64E-NEXT: sw a2, 112(a5) +; RV64I-LP64E-NEXT: sw a3, 108(a5) +; RV64I-LP64E-NEXT: sw a6, 104(a5) +; RV64I-LP64E-NEXT: sw ra, 100(a5) +; RV64I-LP64E-NEXT: sw s1, 96(a5) ; RV64I-LP64E-NEXT: sw s0, 92(a5) ; RV64I-LP64E-NEXT: sw s11, 88(a5) ; RV64I-LP64E-NEXT: sw s10, 84(a5) @@ -876,13 +876,13 @@ define void @callee() { ; RV64I-LP64E-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64I-LP64E-NEXT: sw a0, 16(a5) ; RV64I-LP64E-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a7) +; RV64I-LP64E-NEXT: sw a0, %lo(var+12)(a4) ; RV64I-LP64E-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a7) +; RV64I-LP64E-NEXT: sw a0, %lo(var+8)(a4) ; RV64I-LP64E-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a7) +; RV64I-LP64E-NEXT: sw a0, %lo(var+4)(a4) ; RV64I-LP64E-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64I-LP64E-NEXT: sw a0, %lo(var)(a7) +; RV64I-LP64E-NEXT: sw a0, %lo(var)(a4) ; RV64I-LP64E-NEXT: ld ra, 64(sp) # 8-byte Folded Reload ; RV64I-LP64E-NEXT: ld s0, 56(sp) # 8-byte Folded Reload ; RV64I-LP64E-NEXT: ld s1, 48(sp) # 8-byte Folded Reload @@ -925,16 +925,16 @@ define void @callee() { ; RV64I-WITH-FP-NEXT: .cfi_offset s11, -104 ; RV64I-WITH-FP-NEXT: addi s0, sp, 160 ; RV64I-WITH-FP-NEXT: .cfi_def_cfa s0, 0 -; RV64I-WITH-FP-NEXT: lui t0, %hi(var) -; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(t0) +; RV64I-WITH-FP-NEXT: lui a4, %hi(var) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var)(a4) ; RV64I-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(t0) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+4)(a4) ; RV64I-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(t0) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+8)(a4) ; RV64I-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(t0) +; RV64I-WITH-FP-NEXT: lw a0, %lo(var+12)(a4) ; RV64I-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill -; RV64I-WITH-FP-NEXT: addi a5, t0, %lo(var) +; RV64I-WITH-FP-NEXT: addi a5, a4, %lo(var) ; RV64I-WITH-FP-NEXT: lw a0, 16(a5) ; RV64I-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill ; RV64I-WITH-FP-NEXT: lw a0, 20(a5) @@ -958,22 +958,22 @@ define void @callee() { ; RV64I-WITH-FP-NEXT: lw s9, 84(a5) ; RV64I-WITH-FP-NEXT: lw s10, 88(a5) ; RV64I-WITH-FP-NEXT: lw s11, 92(a5) -; RV64I-WITH-FP-NEXT: lw ra, 112(a5) -; RV64I-WITH-FP-NEXT: lw a4, 116(a5) -; RV64I-WITH-FP-NEXT: lw a3, 120(a5) -; RV64I-WITH-FP-NEXT: lw a0, 124(a5) -; RV64I-WITH-FP-NEXT: lw a7, 96(a5) -; RV64I-WITH-FP-NEXT: lw a6, 100(a5) -; RV64I-WITH-FP-NEXT: lw a2, 104(a5) -; RV64I-WITH-FP-NEXT: lw a1, 108(a5) -; RV64I-WITH-FP-NEXT: sw a0, 124(a5) -; RV64I-WITH-FP-NEXT: sw a3, 120(a5) -; RV64I-WITH-FP-NEXT: sw a4, 116(a5) -; RV64I-WITH-FP-NEXT: sw ra, 112(a5) -; RV64I-WITH-FP-NEXT: sw a1, 108(a5) -; RV64I-WITH-FP-NEXT: sw a2, 104(a5) -; RV64I-WITH-FP-NEXT: sw a6, 100(a5) -; RV64I-WITH-FP-NEXT: sw a7, 96(a5) +; RV64I-WITH-FP-NEXT: lw ra, 96(a5) +; RV64I-WITH-FP-NEXT: lw a7, 100(a5) +; RV64I-WITH-FP-NEXT: lw a6, 104(a5) +; RV64I-WITH-FP-NEXT: lw a3, 108(a5) +; RV64I-WITH-FP-NEXT: lw a2, 112(a5) +; RV64I-WITH-FP-NEXT: lw a1, 116(a5) +; RV64I-WITH-FP-NEXT: lw a0, 120(a5) +; RV64I-WITH-FP-NEXT: lw t0, 124(a5) +; RV64I-WITH-FP-NEXT: sw t0, 124(a5) +; RV64I-WITH-FP-NEXT: sw a0, 120(a5) +; RV64I-WITH-FP-NEXT: sw a1, 116(a5) +; RV64I-WITH-FP-NEXT: sw a2, 112(a5) +; RV64I-WITH-FP-NEXT: sw a3, 108(a5) +; RV64I-WITH-FP-NEXT: sw a6, 104(a5) +; RV64I-WITH-FP-NEXT: sw a7, 100(a5) +; RV64I-WITH-FP-NEXT: sw ra, 96(a5) ; RV64I-WITH-FP-NEXT: sw s11, 92(a5) ; RV64I-WITH-FP-NEXT: sw s10, 88(a5) ; RV64I-WITH-FP-NEXT: sw s9, 84(a5) @@ -998,13 +998,13 @@ define void @callee() { ; RV64I-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: sw a0, 16(a5) ; RV64I-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(t0) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+12)(a4) ; RV64I-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(t0) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+8)(a4) ; RV64I-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(t0) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var+4)(a4) ; RV64I-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload -; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(t0) +; RV64I-WITH-FP-NEXT: sw a0, %lo(var)(a4) ; RV64I-WITH-FP-NEXT: .cfi_def_cfa sp, 160 ; RV64I-WITH-FP-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; RV64I-WITH-FP-NEXT: ld s0, 144(sp) # 8-byte Folded Reload @@ -1053,16 +1053,16 @@ define void @callee() { ; RV64IZCMP-NEXT: .cfi_offset s9, -24 ; RV64IZCMP-NEXT: .cfi_offset s10, -16 ; RV64IZCMP-NEXT: .cfi_offset s11, -8 -; RV64IZCMP-NEXT: lui t0, %hi(var) -; RV64IZCMP-NEXT: lw a0, %lo(var)(t0) +; RV64IZCMP-NEXT: lui a4, %hi(var) +; RV64IZCMP-NEXT: lw a0, %lo(var)(a4) ; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var+4)(t0) +; RV64IZCMP-NEXT: lw a0, %lo(var+4)(a4) ; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var+8)(t0) +; RV64IZCMP-NEXT: lw a0, %lo(var+8)(a4) ; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var+12)(t0) +; RV64IZCMP-NEXT: lw a0, %lo(var+12)(a4) ; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: addi a5, t0, %lo(var) +; RV64IZCMP-NEXT: addi a5, a4, %lo(var) ; RV64IZCMP-NEXT: lw a0, 16(a5) ; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64IZCMP-NEXT: lw a0, 20(a5) @@ -1082,28 +1082,28 @@ define void @callee() { ; RV64IZCMP-NEXT: lw s11, 72(a5) ; RV64IZCMP-NEXT: lw ra, 76(a5) ; RV64IZCMP-NEXT: lw s1, 80(a5) -; RV64IZCMP-NEXT: lw t3, 84(a5) -; RV64IZCMP-NEXT: lw t2, 88(a5) -; RV64IZCMP-NEXT: lw t1, 92(a5) -; RV64IZCMP-NEXT: lw a7, 112(a5) -; RV64IZCMP-NEXT: lw s0, 116(a5) -; RV64IZCMP-NEXT: lw a3, 120(a5) -; RV64IZCMP-NEXT: lw a0, 124(a5) -; RV64IZCMP-NEXT: lw a6, 96(a5) -; RV64IZCMP-NEXT: lw a4, 100(a5) -; RV64IZCMP-NEXT: lw a2, 104(a5) -; RV64IZCMP-NEXT: lw a1, 108(a5) -; RV64IZCMP-NEXT: sw a0, 124(a5) -; RV64IZCMP-NEXT: sw a3, 120(a5) -; RV64IZCMP-NEXT: sw s0, 116(a5) -; RV64IZCMP-NEXT: sw a7, 112(a5) -; RV64IZCMP-NEXT: sw a1, 108(a5) -; RV64IZCMP-NEXT: sw a2, 104(a5) -; RV64IZCMP-NEXT: sw a4, 100(a5) -; RV64IZCMP-NEXT: sw a6, 96(a5) -; RV64IZCMP-NEXT: sw t1, 92(a5) -; RV64IZCMP-NEXT: sw t2, 88(a5) -; RV64IZCMP-NEXT: sw t3, 84(a5) +; RV64IZCMP-NEXT: lw t2, 84(a5) +; RV64IZCMP-NEXT: lw t1, 88(a5) +; RV64IZCMP-NEXT: lw t0, 92(a5) +; RV64IZCMP-NEXT: lw a7, 96(a5) +; RV64IZCMP-NEXT: lw s0, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a3, 108(a5) +; RV64IZCMP-NEXT: lw a2, 112(a5) +; RV64IZCMP-NEXT: lw a1, 116(a5) +; RV64IZCMP-NEXT: lw a0, 120(a5) +; RV64IZCMP-NEXT: lw t3, 124(a5) +; RV64IZCMP-NEXT: sw t3, 124(a5) +; RV64IZCMP-NEXT: sw a0, 120(a5) +; RV64IZCMP-NEXT: sw a1, 116(a5) +; RV64IZCMP-NEXT: sw a2, 112(a5) +; RV64IZCMP-NEXT: sw a3, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw s0, 100(a5) +; RV64IZCMP-NEXT: sw a7, 96(a5) +; RV64IZCMP-NEXT: sw t0, 92(a5) +; RV64IZCMP-NEXT: sw t1, 88(a5) +; RV64IZCMP-NEXT: sw t2, 84(a5) ; RV64IZCMP-NEXT: sw s1, 80(a5) ; RV64IZCMP-NEXT: sw ra, 76(a5) ; RV64IZCMP-NEXT: sw s11, 72(a5) @@ -1124,13 +1124,13 @@ define void @callee() { ; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: sw a0, 16(a5) ; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var+12)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var+12)(a4) ; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var+8)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var+8)(a4) ; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var+4)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var+4)(a4) ; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var)(a4) ; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 ; ; RV64IZCMP-WITH-FP-LABEL: callee: @@ -1165,16 +1165,16 @@ define void @callee() { ; RV64IZCMP-WITH-FP-NEXT: .cfi_offset s11, -104 ; RV64IZCMP-WITH-FP-NEXT: addi s0, sp, 160 ; RV64IZCMP-WITH-FP-NEXT: .cfi_def_cfa s0, 0 -; RV64IZCMP-WITH-FP-NEXT: lui t1, %hi(var) -; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(t1) +; RV64IZCMP-WITH-FP-NEXT: lui a4, %hi(var) +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var)(a4) ; RV64IZCMP-WITH-FP-NEXT: sd a0, -112(s0) # 8-byte Folded Spill -; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(t1) +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+4)(a4) ; RV64IZCMP-WITH-FP-NEXT: sd a0, -120(s0) # 8-byte Folded Spill -; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(t1) +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+8)(a4) ; RV64IZCMP-WITH-FP-NEXT: sd a0, -128(s0) # 8-byte Folded Spill -; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(t1) +; RV64IZCMP-WITH-FP-NEXT: lw a0, %lo(var+12)(a4) ; RV64IZCMP-WITH-FP-NEXT: sd a0, -136(s0) # 8-byte Folded Spill -; RV64IZCMP-WITH-FP-NEXT: addi a5, t1, %lo(var) +; RV64IZCMP-WITH-FP-NEXT: addi a5, a4, %lo(var) ; RV64IZCMP-WITH-FP-NEXT: lw a0, 16(a5) ; RV64IZCMP-WITH-FP-NEXT: sd a0, -144(s0) # 8-byte Folded Spill ; RV64IZCMP-WITH-FP-NEXT: lw a0, 20(a5) @@ -1194,30 +1194,30 @@ define void @callee() { ; RV64IZCMP-WITH-FP-NEXT: lw s10, 68(a5) ; RV64IZCMP-WITH-FP-NEXT: lw s11, 72(a5) ; RV64IZCMP-WITH-FP-NEXT: lw ra, 76(a5) -; RV64IZCMP-WITH-FP-NEXT: lw t4, 80(a5) -; RV64IZCMP-WITH-FP-NEXT: lw t3, 84(a5) -; RV64IZCMP-WITH-FP-NEXT: lw t2, 88(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t3, 80(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t2, 84(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t1, 88(a5) ; RV64IZCMP-WITH-FP-NEXT: lw s1, 92(a5) -; RV64IZCMP-WITH-FP-NEXT: lw t0, 112(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a4, 116(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a3, 120(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a0, 124(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a7, 96(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a6, 100(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a2, 104(a5) -; RV64IZCMP-WITH-FP-NEXT: lw a1, 108(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a0, 124(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a3, 120(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a4, 116(a5) -; RV64IZCMP-WITH-FP-NEXT: sw t0, 112(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a1, 108(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a2, 104(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a6, 100(a5) -; RV64IZCMP-WITH-FP-NEXT: sw a7, 96(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t0, 96(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a7, 100(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a6, 104(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a3, 108(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a2, 112(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a1, 116(a5) +; RV64IZCMP-WITH-FP-NEXT: lw a0, 120(a5) +; RV64IZCMP-WITH-FP-NEXT: lw t4, 124(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t4, 124(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a0, 120(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a1, 116(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a2, 112(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a3, 108(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a6, 104(a5) +; RV64IZCMP-WITH-FP-NEXT: sw a7, 100(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t0, 96(a5) ; RV64IZCMP-WITH-FP-NEXT: sw s1, 92(a5) -; RV64IZCMP-WITH-FP-NEXT: sw t2, 88(a5) -; RV64IZCMP-WITH-FP-NEXT: sw t3, 84(a5) -; RV64IZCMP-WITH-FP-NEXT: sw t4, 80(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t1, 88(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t2, 84(a5) +; RV64IZCMP-WITH-FP-NEXT: sw t3, 80(a5) ; RV64IZCMP-WITH-FP-NEXT: sw ra, 76(a5) ; RV64IZCMP-WITH-FP-NEXT: sw s11, 72(a5) ; RV64IZCMP-WITH-FP-NEXT: sw s10, 68(a5) @@ -1238,13 +1238,13 @@ define void @callee() { ; RV64IZCMP-WITH-FP-NEXT: ld a0, -144(s0) # 8-byte Folded Reload ; RV64IZCMP-WITH-FP-NEXT: sw a0, 16(a5) ; RV64IZCMP-WITH-FP-NEXT: ld a0, -136(s0) # 8-byte Folded Reload -; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(t1) +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+12)(a4) ; RV64IZCMP-WITH-FP-NEXT: ld a0, -128(s0) # 8-byte Folded Reload -; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(t1) +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+8)(a4) ; RV64IZCMP-WITH-FP-NEXT: ld a0, -120(s0) # 8-byte Folded Reload -; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(t1) +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var+4)(a4) ; RV64IZCMP-WITH-FP-NEXT: ld a0, -112(s0) # 8-byte Folded Reload -; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(t1) +; RV64IZCMP-WITH-FP-NEXT: sw a0, %lo(var)(a4) ; RV64IZCMP-WITH-FP-NEXT: .cfi_def_cfa sp, 160 ; RV64IZCMP-WITH-FP-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; RV64IZCMP-WITH-FP-NEXT: ld s0, 144(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-half.ll b/llvm/test/CodeGen/RISCV/calling-conv-half.ll index 541c9b4d40c7e..aa08c3f5c95b1 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-half.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-half.ll @@ -225,8 +225,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu a0, 16(sp) ; RV32I-NEXT: mv s0, a7 +; RV32I-NEXT: lhu a0, 16(sp) ; RV32I-NEXT: call __extendhfsf2 ; RV32I-NEXT: call __fixsfsi ; RV32I-NEXT: add a0, s0, a0 @@ -240,8 +240,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ; RV64I-NEXT: addi sp, sp, -16 ; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu a0, 16(sp) ; RV64I-NEXT: mv s0, a7 +; RV64I-NEXT: lhu a0, 16(sp) ; RV64I-NEXT: call __extendhfsf2 ; RV64I-NEXT: call __fixsfdi ; RV64I-NEXT: addw a0, s0, a0 @@ -255,8 +255,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ; RV32IF-NEXT: addi sp, sp, -16 ; RV32IF-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IF-NEXT: sw s0, 8(sp) # 4-byte Folded Spill -; RV32IF-NEXT: lhu a0, 16(sp) ; RV32IF-NEXT: mv s0, a7 +; RV32IF-NEXT: lhu a0, 16(sp) ; RV32IF-NEXT: call __extendhfsf2 ; RV32IF-NEXT: fmv.w.x fa5, a0 ; RV32IF-NEXT: fcvt.w.s a0, fa5, rtz @@ -271,8 +271,8 @@ define i32 @callee_half_on_stack(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, ; RV64IF-NEXT: addi sp, sp, -16 ; RV64IF-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64IF-NEXT: sd s0, 0(sp) # 8-byte Folded Spill -; RV64IF-NEXT: lhu a0, 16(sp) ; RV64IF-NEXT: mv s0, a7 +; RV64IF-NEXT: lhu a0, 16(sp) ; RV64IF-NEXT: call __extendhfsf2 ; RV64IF-NEXT: fmv.w.x fa5, a0 ; RV64IF-NEXT: fcvt.l.s a0, fa5, rtz @@ -341,9 +341,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV32I-NEXT: li a4, 5 ; RV32I-NEXT: li a5, 6 ; RV32I-NEXT: li a6, 7 -; RV32I-NEXT: addi t0, a7, -1792 +; RV32I-NEXT: addi a7, a7, -1792 +; RV32I-NEXT: sw a7, 0(sp) ; RV32I-NEXT: li a7, 8 -; RV32I-NEXT: sw t0, 0(sp) ; RV32I-NEXT: call callee_half_on_stack ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 @@ -361,9 +361,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: li a5, 6 ; RV64I-NEXT: li a6, 7 -; RV64I-NEXT: addiw t0, a7, -1792 +; RV64I-NEXT: addiw a7, a7, -1792 +; RV64I-NEXT: sd a7, 0(sp) ; RV64I-NEXT: li a7, 8 -; RV64I-NEXT: sd t0, 0(sp) ; RV64I-NEXT: call callee_half_on_stack ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 @@ -381,9 +381,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV32IF-NEXT: li a4, 5 ; RV32IF-NEXT: li a5, 6 ; RV32IF-NEXT: li a6, 7 -; RV32IF-NEXT: addi t0, a7, -1792 +; RV32IF-NEXT: addi a7, a7, -1792 +; RV32IF-NEXT: sw a7, 0(sp) ; RV32IF-NEXT: li a7, 8 -; RV32IF-NEXT: sw t0, 0(sp) ; RV32IF-NEXT: call callee_half_on_stack ; RV32IF-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IF-NEXT: addi sp, sp, 16 @@ -401,9 +401,9 @@ define i32 @caller_half_on_stack() nounwind { ; RV64IF-NEXT: li a4, 5 ; RV64IF-NEXT: li a5, 6 ; RV64IF-NEXT: li a6, 7 -; RV64IF-NEXT: addi t0, a7, -1792 +; RV64IF-NEXT: addi a7, a7, -1792 +; RV64IF-NEXT: sw a7, 0(sp) ; RV64IF-NEXT: li a7, 8 -; RV64IF-NEXT: sw t0, 0(sp) ; RV64IF-NEXT: call callee_half_on_stack ; RV64IF-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IF-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll index 9387b7ef4c32e..6697cd0e503e7 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-common.ll @@ -94,15 +94,15 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-FPELIM-LABEL: callee_aligned_stack: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 0(a2) -; RV32I-FPELIM-NEXT: lw a1, 8(sp) -; RV32I-FPELIM-NEXT: lw a2, 0(sp) -; RV32I-FPELIM-NEXT: lw a3, 20(sp) +; RV32I-FPELIM-NEXT: lw a1, 20(sp) +; RV32I-FPELIM-NEXT: lw a2, 8(sp) +; RV32I-FPELIM-NEXT: lw a3, 0(sp) ; RV32I-FPELIM-NEXT: lw a4, 16(sp) ; RV32I-FPELIM-NEXT: add a0, a0, a7 -; RV32I-FPELIM-NEXT: add a1, a2, a1 +; RV32I-FPELIM-NEXT: add a2, a3, a2 +; RV32I-FPELIM-NEXT: add a0, a0, a2 +; RV32I-FPELIM-NEXT: add a1, a4, a1 ; RV32I-FPELIM-NEXT: add a0, a0, a1 -; RV32I-FPELIM-NEXT: add a3, a4, a3 -; RV32I-FPELIM-NEXT: add a0, a0, a3 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_aligned_stack: @@ -112,15 +112,15 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 0(a2) -; RV32I-WITHFP-NEXT: lw a1, 8(s0) -; RV32I-WITHFP-NEXT: lw a2, 0(s0) -; RV32I-WITHFP-NEXT: lw a3, 20(s0) +; RV32I-WITHFP-NEXT: lw a1, 20(s0) +; RV32I-WITHFP-NEXT: lw a2, 8(s0) +; RV32I-WITHFP-NEXT: lw a3, 0(s0) ; RV32I-WITHFP-NEXT: lw a4, 16(s0) ; RV32I-WITHFP-NEXT: add a0, a0, a7 -; RV32I-WITHFP-NEXT: add a1, a2, a1 +; RV32I-WITHFP-NEXT: add a2, a3, a2 +; RV32I-WITHFP-NEXT: add a0, a0, a2 +; RV32I-WITHFP-NEXT: add a1, a4, a1 ; RV32I-WITHFP-NEXT: add a0, a0, a1 -; RV32I-WITHFP-NEXT: add a3, a4, a3 -; RV32I-WITHFP-NEXT: add a0, a0, a3 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: addi sp, sp, 16 @@ -145,45 +145,43 @@ define void @caller_aligned_stack() nounwind { ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -64 ; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: li a5, 18 -; RV32I-FPELIM-NEXT: li a6, 17 -; RV32I-FPELIM-NEXT: li a7, 16 -; RV32I-FPELIM-NEXT: lui t0, 262236 -; RV32I-FPELIM-NEXT: lui t1, 377487 -; RV32I-FPELIM-NEXT: li t2, 15 -; RV32I-FPELIM-NEXT: lui t3, 262153 -; RV32I-FPELIM-NEXT: lui t4, 545260 -; RV32I-FPELIM-NEXT: lui t5, 964690 -; RV32I-FPELIM-NEXT: lui t6, 335544 -; RV32I-FPELIM-NEXT: lui s0, 688509 +; RV32I-FPELIM-NEXT: li a4, 18 +; RV32I-FPELIM-NEXT: li a5, 17 +; RV32I-FPELIM-NEXT: li a6, 16 +; RV32I-FPELIM-NEXT: lui a7, 262236 +; RV32I-FPELIM-NEXT: lui t0, 377487 +; RV32I-FPELIM-NEXT: li t1, 15 +; RV32I-FPELIM-NEXT: lui t2, 262153 +; RV32I-FPELIM-NEXT: lui t3, 545260 +; RV32I-FPELIM-NEXT: lui t4, 964690 +; RV32I-FPELIM-NEXT: lui t5, 335544 +; RV32I-FPELIM-NEXT: lui t6, 688509 ; RV32I-FPELIM-NEXT: li a0, 1 ; RV32I-FPELIM-NEXT: li a1, 11 ; RV32I-FPELIM-NEXT: addi a2, sp, 32 ; RV32I-FPELIM-NEXT: li a3, 12 +; RV32I-FPELIM-NEXT: sw a5, 20(sp) +; RV32I-FPELIM-NEXT: sw a4, 24(sp) ; RV32I-FPELIM-NEXT: li a4, 13 -; RV32I-FPELIM-NEXT: sw a6, 20(sp) -; RV32I-FPELIM-NEXT: sw a5, 24(sp) -; RV32I-FPELIM-NEXT: li a6, 4 -; RV32I-FPELIM-NEXT: addi a5, t0, 655 -; RV32I-FPELIM-NEXT: addi t0, t1, 1475 -; RV32I-FPELIM-NEXT: sw t2, 0(sp) -; RV32I-FPELIM-NEXT: sw t0, 8(sp) +; RV32I-FPELIM-NEXT: addi a5, a7, 655 +; RV32I-FPELIM-NEXT: addi a7, t0, 1475 +; RV32I-FPELIM-NEXT: sw t1, 0(sp) +; RV32I-FPELIM-NEXT: sw a7, 8(sp) ; RV32I-FPELIM-NEXT: sw a5, 12(sp) -; RV32I-FPELIM-NEXT: sw a7, 16(sp) +; RV32I-FPELIM-NEXT: sw a6, 16(sp) +; RV32I-FPELIM-NEXT: li a6, 4 +; RV32I-FPELIM-NEXT: addi a7, t2, 491 +; RV32I-FPELIM-NEXT: addi t0, t3, -1967 +; RV32I-FPELIM-NEXT: addi t1, t4, -328 +; RV32I-FPELIM-NEXT: addi t2, t5, 1311 +; RV32I-FPELIM-NEXT: addi a5, t6, -2048 +; RV32I-FPELIM-NEXT: sw t2, 32(sp) +; RV32I-FPELIM-NEXT: sw t1, 36(sp) +; RV32I-FPELIM-NEXT: sw t0, 40(sp) +; RV32I-FPELIM-NEXT: sw a7, 44(sp) ; RV32I-FPELIM-NEXT: li a7, 14 -; RV32I-FPELIM-NEXT: addi t0, t3, 491 -; RV32I-FPELIM-NEXT: addi t1, t4, -1967 -; RV32I-FPELIM-NEXT: addi t2, t5, -328 -; RV32I-FPELIM-NEXT: addi t3, t6, 1311 -; RV32I-FPELIM-NEXT: addi a5, s0, -2048 -; RV32I-FPELIM-NEXT: sw t3, 32(sp) -; RV32I-FPELIM-NEXT: sw t2, 36(sp) -; RV32I-FPELIM-NEXT: sw t1, 40(sp) -; RV32I-FPELIM-NEXT: sw t0, 44(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack ; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload -; RV32I-FPELIM-NEXT: lw s0, 56(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 64 ; RV32I-FPELIM-NEXT: ret ; @@ -192,47 +190,45 @@ define void @caller_aligned_stack() nounwind { ; RV32I-WITHFP-NEXT: addi sp, sp, -64 ; RV32I-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill -; RV32I-WITHFP-NEXT: sw s1, 52(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 64 -; RV32I-WITHFP-NEXT: li a5, 18 -; RV32I-WITHFP-NEXT: li a6, 17 -; RV32I-WITHFP-NEXT: li a7, 16 -; RV32I-WITHFP-NEXT: lui t0, 262236 -; RV32I-WITHFP-NEXT: lui t1, 377487 -; RV32I-WITHFP-NEXT: li t2, 15 -; RV32I-WITHFP-NEXT: lui t3, 262153 -; RV32I-WITHFP-NEXT: lui t4, 545260 -; RV32I-WITHFP-NEXT: lui t5, 964690 -; RV32I-WITHFP-NEXT: lui t6, 335544 -; RV32I-WITHFP-NEXT: lui s1, 688509 +; RV32I-WITHFP-NEXT: li a4, 18 +; RV32I-WITHFP-NEXT: li a5, 17 +; RV32I-WITHFP-NEXT: li a6, 16 +; RV32I-WITHFP-NEXT: lui a7, 262236 +; RV32I-WITHFP-NEXT: lui t0, 377487 +; RV32I-WITHFP-NEXT: li t1, 15 +; RV32I-WITHFP-NEXT: lui t2, 262153 +; RV32I-WITHFP-NEXT: lui t3, 545260 +; RV32I-WITHFP-NEXT: lui t4, 964690 +; RV32I-WITHFP-NEXT: lui t5, 335544 +; RV32I-WITHFP-NEXT: lui t6, 688509 ; RV32I-WITHFP-NEXT: li a0, 1 ; RV32I-WITHFP-NEXT: li a1, 11 ; RV32I-WITHFP-NEXT: addi a2, s0, -32 ; RV32I-WITHFP-NEXT: li a3, 12 +; RV32I-WITHFP-NEXT: sw a5, 20(sp) +; RV32I-WITHFP-NEXT: sw a4, 24(sp) ; RV32I-WITHFP-NEXT: li a4, 13 -; RV32I-WITHFP-NEXT: sw a6, 20(sp) -; RV32I-WITHFP-NEXT: sw a5, 24(sp) -; RV32I-WITHFP-NEXT: li a6, 4 -; RV32I-WITHFP-NEXT: addi a5, t0, 655 -; RV32I-WITHFP-NEXT: addi t0, t1, 1475 -; RV32I-WITHFP-NEXT: sw t2, 0(sp) -; RV32I-WITHFP-NEXT: sw t0, 8(sp) +; RV32I-WITHFP-NEXT: addi a5, a7, 655 +; RV32I-WITHFP-NEXT: addi a7, t0, 1475 +; RV32I-WITHFP-NEXT: sw t1, 0(sp) +; RV32I-WITHFP-NEXT: sw a7, 8(sp) ; RV32I-WITHFP-NEXT: sw a5, 12(sp) -; RV32I-WITHFP-NEXT: sw a7, 16(sp) +; RV32I-WITHFP-NEXT: sw a6, 16(sp) +; RV32I-WITHFP-NEXT: li a6, 4 +; RV32I-WITHFP-NEXT: addi a7, t2, 491 +; RV32I-WITHFP-NEXT: addi t0, t3, -1967 +; RV32I-WITHFP-NEXT: addi t1, t4, -328 +; RV32I-WITHFP-NEXT: addi t2, t5, 1311 +; RV32I-WITHFP-NEXT: addi a5, t6, -2048 +; RV32I-WITHFP-NEXT: sw t2, -32(s0) +; RV32I-WITHFP-NEXT: sw t1, -28(s0) +; RV32I-WITHFP-NEXT: sw t0, -24(s0) +; RV32I-WITHFP-NEXT: sw a7, -20(s0) ; RV32I-WITHFP-NEXT: li a7, 14 -; RV32I-WITHFP-NEXT: addi t0, t3, 491 -; RV32I-WITHFP-NEXT: addi t1, t4, -1967 -; RV32I-WITHFP-NEXT: addi t2, t5, -328 -; RV32I-WITHFP-NEXT: addi t3, t6, 1311 -; RV32I-WITHFP-NEXT: addi a5, s1, -2048 -; RV32I-WITHFP-NEXT: sw t3, -32(s0) -; RV32I-WITHFP-NEXT: sw t2, -28(s0) -; RV32I-WITHFP-NEXT: sw t1, -24(s0) -; RV32I-WITHFP-NEXT: sw t0, -20(s0) ; RV32I-WITHFP-NEXT: call callee_aligned_stack ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload -; RV32I-WITHFP-NEXT: lw s1, 52(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: addi sp, sp, 64 ; RV32I-WITHFP-NEXT: ret %1 = call i32 @callee_aligned_stack(i32 1, i32 11, diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll index 18916dd69eb43..f54e86b497945 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32-ilp32f-ilp32d-common.ll @@ -149,9 +149,9 @@ define i32 @caller_many_scalars() nounwind { ; RV32I-FPELIM-NEXT: li a3, 4 ; RV32I-FPELIM-NEXT: li a5, 5 ; RV32I-FPELIM-NEXT: li a6, 6 -; RV32I-FPELIM-NEXT: li a7, 7 ; RV32I-FPELIM-NEXT: sw zero, 0(sp) ; RV32I-FPELIM-NEXT: sw a4, 4(sp) +; RV32I-FPELIM-NEXT: li a7, 7 ; RV32I-FPELIM-NEXT: li a4, 0 ; RV32I-FPELIM-NEXT: call callee_many_scalars ; RV32I-FPELIM-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -171,9 +171,9 @@ define i32 @caller_many_scalars() nounwind { ; RV32I-WITHFP-NEXT: li a3, 4 ; RV32I-WITHFP-NEXT: li a5, 5 ; RV32I-WITHFP-NEXT: li a6, 6 -; RV32I-WITHFP-NEXT: li a7, 7 ; RV32I-WITHFP-NEXT: sw zero, 0(sp) ; RV32I-WITHFP-NEXT: sw a4, 4(sp) +; RV32I-WITHFP-NEXT: li a7, 7 ; RV32I-WITHFP-NEXT: li a4, 0 ; RV32I-WITHFP-NEXT: call callee_many_scalars ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -194,17 +194,17 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-FPELIM-NEXT: lw a3, 4(a1) ; RV32I-FPELIM-NEXT: lw a4, 8(a1) ; RV32I-FPELIM-NEXT: lw a1, 12(a1) -; RV32I-FPELIM-NEXT: lw a5, 12(a0) +; RV32I-FPELIM-NEXT: lw a5, 0(a0) ; RV32I-FPELIM-NEXT: lw a6, 4(a0) ; RV32I-FPELIM-NEXT: lw a7, 8(a0) -; RV32I-FPELIM-NEXT: lw a0, 0(a0) -; RV32I-FPELIM-NEXT: xor a1, a5, a1 -; RV32I-FPELIM-NEXT: xor a3, a6, a3 -; RV32I-FPELIM-NEXT: xor a4, a7, a4 -; RV32I-FPELIM-NEXT: xor a0, a0, a2 -; RV32I-FPELIM-NEXT: or a1, a3, a1 -; RV32I-FPELIM-NEXT: or a0, a0, a4 -; RV32I-FPELIM-NEXT: or a0, a0, a1 +; RV32I-FPELIM-NEXT: lw a0, 12(a0) +; RV32I-FPELIM-NEXT: xor a0, a0, a1 +; RV32I-FPELIM-NEXT: xor a1, a6, a3 +; RV32I-FPELIM-NEXT: xor a3, a7, a4 +; RV32I-FPELIM-NEXT: xor a2, a5, a2 +; RV32I-FPELIM-NEXT: or a0, a1, a0 +; RV32I-FPELIM-NEXT: or a2, a2, a3 +; RV32I-FPELIM-NEXT: or a0, a2, a0 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -218,17 +218,17 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) nounwind { ; RV32I-WITHFP-NEXT: lw a3, 4(a1) ; RV32I-WITHFP-NEXT: lw a4, 8(a1) ; RV32I-WITHFP-NEXT: lw a1, 12(a1) -; RV32I-WITHFP-NEXT: lw a5, 12(a0) +; RV32I-WITHFP-NEXT: lw a5, 0(a0) ; RV32I-WITHFP-NEXT: lw a6, 4(a0) ; RV32I-WITHFP-NEXT: lw a7, 8(a0) -; RV32I-WITHFP-NEXT: lw a0, 0(a0) -; RV32I-WITHFP-NEXT: xor a1, a5, a1 -; RV32I-WITHFP-NEXT: xor a3, a6, a3 -; RV32I-WITHFP-NEXT: xor a4, a7, a4 -; RV32I-WITHFP-NEXT: xor a0, a0, a2 -; RV32I-WITHFP-NEXT: or a1, a3, a1 -; RV32I-WITHFP-NEXT: or a0, a0, a4 -; RV32I-WITHFP-NEXT: or a0, a0, a1 +; RV32I-WITHFP-NEXT: lw a0, 12(a0) +; RV32I-WITHFP-NEXT: xor a0, a0, a1 +; RV32I-WITHFP-NEXT: xor a1, a6, a3 +; RV32I-WITHFP-NEXT: xor a3, a7, a4 +; RV32I-WITHFP-NEXT: xor a2, a5, a2 +; RV32I-WITHFP-NEXT: or a0, a1, a0 +; RV32I-WITHFP-NEXT: or a2, a2, a3 +; RV32I-WITHFP-NEXT: or a0, a2, a0 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -245,18 +245,18 @@ define i32 @caller_large_scalars() nounwind { ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -48 ; RV32I-FPELIM-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: lui a1, 524272 -; RV32I-FPELIM-NEXT: li a2, 1 -; RV32I-FPELIM-NEXT: addi a0, sp, 24 +; RV32I-FPELIM-NEXT: lui a0, 524272 +; RV32I-FPELIM-NEXT: li a1, 1 ; RV32I-FPELIM-NEXT: sw zero, 0(sp) ; RV32I-FPELIM-NEXT: sw zero, 4(sp) ; RV32I-FPELIM-NEXT: sw zero, 8(sp) -; RV32I-FPELIM-NEXT: sw a1, 12(sp) -; RV32I-FPELIM-NEXT: mv a1, sp -; RV32I-FPELIM-NEXT: sw a2, 24(sp) +; RV32I-FPELIM-NEXT: sw a0, 12(sp) +; RV32I-FPELIM-NEXT: addi a0, sp, 24 +; RV32I-FPELIM-NEXT: sw a1, 24(sp) ; RV32I-FPELIM-NEXT: sw zero, 28(sp) ; RV32I-FPELIM-NEXT: sw zero, 32(sp) ; RV32I-FPELIM-NEXT: sw zero, 36(sp) +; RV32I-FPELIM-NEXT: mv a1, sp ; RV32I-FPELIM-NEXT: call callee_large_scalars ; RV32I-FPELIM-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 48 @@ -268,18 +268,18 @@ define i32 @caller_large_scalars() nounwind { ; RV32I-WITHFP-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 48 -; RV32I-WITHFP-NEXT: lui a1, 524272 -; RV32I-WITHFP-NEXT: li a2, 1 -; RV32I-WITHFP-NEXT: addi a0, s0, -24 +; RV32I-WITHFP-NEXT: lui a0, 524272 +; RV32I-WITHFP-NEXT: li a1, 1 ; RV32I-WITHFP-NEXT: sw zero, -48(s0) ; RV32I-WITHFP-NEXT: sw zero, -44(s0) ; RV32I-WITHFP-NEXT: sw zero, -40(s0) -; RV32I-WITHFP-NEXT: sw a1, -36(s0) -; RV32I-WITHFP-NEXT: addi a1, s0, -48 -; RV32I-WITHFP-NEXT: sw a2, -24(s0) +; RV32I-WITHFP-NEXT: sw a0, -36(s0) +; RV32I-WITHFP-NEXT: addi a0, s0, -24 +; RV32I-WITHFP-NEXT: sw a1, -24(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) ; RV32I-WITHFP-NEXT: sw zero, -16(s0) ; RV32I-WITHFP-NEXT: sw zero, -12(s0) +; RV32I-WITHFP-NEXT: addi a1, s0, -48 ; RV32I-WITHFP-NEXT: call callee_large_scalars ; RV32I-WITHFP-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 40(sp) # 4-byte Folded Reload @@ -301,17 +301,17 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; RV32I-FPELIM-NEXT: lw a2, 4(a7) ; RV32I-FPELIM-NEXT: lw a3, 8(a7) ; RV32I-FPELIM-NEXT: lw a4, 12(a7) -; RV32I-FPELIM-NEXT: lw a5, 12(a0) +; RV32I-FPELIM-NEXT: lw a5, 0(a0) ; RV32I-FPELIM-NEXT: lw a6, 4(a0) ; RV32I-FPELIM-NEXT: lw a7, 8(a0) -; RV32I-FPELIM-NEXT: lw a0, 0(a0) -; RV32I-FPELIM-NEXT: xor a4, a4, a5 +; RV32I-FPELIM-NEXT: lw a0, 12(a0) +; RV32I-FPELIM-NEXT: xor a0, a4, a0 ; RV32I-FPELIM-NEXT: xor a2, a2, a6 ; RV32I-FPELIM-NEXT: xor a3, a3, a7 -; RV32I-FPELIM-NEXT: xor a0, a1, a0 -; RV32I-FPELIM-NEXT: or a2, a2, a4 -; RV32I-FPELIM-NEXT: or a0, a0, a3 -; RV32I-FPELIM-NEXT: or a0, a0, a2 +; RV32I-FPELIM-NEXT: xor a1, a1, a5 +; RV32I-FPELIM-NEXT: or a0, a2, a0 +; RV32I-FPELIM-NEXT: or a1, a1, a3 +; RV32I-FPELIM-NEXT: or a0, a1, a0 ; RV32I-FPELIM-NEXT: seqz a0, a0 ; RV32I-FPELIM-NEXT: ret ; @@ -326,17 +326,17 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; RV32I-WITHFP-NEXT: lw a2, 4(a7) ; RV32I-WITHFP-NEXT: lw a3, 8(a7) ; RV32I-WITHFP-NEXT: lw a4, 12(a7) -; RV32I-WITHFP-NEXT: lw a5, 12(a0) +; RV32I-WITHFP-NEXT: lw a5, 0(a0) ; RV32I-WITHFP-NEXT: lw a6, 4(a0) ; RV32I-WITHFP-NEXT: lw a7, 8(a0) -; RV32I-WITHFP-NEXT: lw a0, 0(a0) -; RV32I-WITHFP-NEXT: xor a4, a4, a5 +; RV32I-WITHFP-NEXT: lw a0, 12(a0) +; RV32I-WITHFP-NEXT: xor a0, a4, a0 ; RV32I-WITHFP-NEXT: xor a2, a2, a6 ; RV32I-WITHFP-NEXT: xor a3, a3, a7 -; RV32I-WITHFP-NEXT: xor a0, a1, a0 -; RV32I-WITHFP-NEXT: or a2, a2, a4 -; RV32I-WITHFP-NEXT: or a0, a0, a3 -; RV32I-WITHFP-NEXT: or a0, a0, a2 +; RV32I-WITHFP-NEXT: xor a1, a1, a5 +; RV32I-WITHFP-NEXT: or a0, a2, a0 +; RV32I-WITHFP-NEXT: or a1, a1, a3 +; RV32I-WITHFP-NEXT: or a0, a1, a0 ; RV32I-WITHFP-NEXT: seqz a0, a0 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload @@ -353,28 +353,28 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind { ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -64 ; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: addi a6, sp, 16 -; RV32I-FPELIM-NEXT: li a7, 9 -; RV32I-FPELIM-NEXT: lui t0, 524272 -; RV32I-FPELIM-NEXT: li t1, 8 +; RV32I-FPELIM-NEXT: addi a5, sp, 16 +; RV32I-FPELIM-NEXT: li a6, 9 +; RV32I-FPELIM-NEXT: lui a7, 524272 +; RV32I-FPELIM-NEXT: li t0, 8 ; RV32I-FPELIM-NEXT: li a0, 1 ; RV32I-FPELIM-NEXT: li a1, 2 ; RV32I-FPELIM-NEXT: li a2, 3 ; RV32I-FPELIM-NEXT: li a3, 4 ; RV32I-FPELIM-NEXT: li a4, 5 +; RV32I-FPELIM-NEXT: sw a6, 0(sp) +; RV32I-FPELIM-NEXT: sw a5, 4(sp) ; RV32I-FPELIM-NEXT: li a5, 6 -; RV32I-FPELIM-NEXT: sw a7, 0(sp) -; RV32I-FPELIM-NEXT: sw a6, 4(sp) -; RV32I-FPELIM-NEXT: li a6, 7 ; RV32I-FPELIM-NEXT: sw zero, 16(sp) ; RV32I-FPELIM-NEXT: sw zero, 20(sp) ; RV32I-FPELIM-NEXT: sw zero, 24(sp) -; RV32I-FPELIM-NEXT: sw t0, 28(sp) -; RV32I-FPELIM-NEXT: addi a7, sp, 40 -; RV32I-FPELIM-NEXT: sw t1, 40(sp) +; RV32I-FPELIM-NEXT: sw a7, 28(sp) +; RV32I-FPELIM-NEXT: li a6, 7 +; RV32I-FPELIM-NEXT: sw t0, 40(sp) ; RV32I-FPELIM-NEXT: sw zero, 44(sp) ; RV32I-FPELIM-NEXT: sw zero, 48(sp) ; RV32I-FPELIM-NEXT: sw zero, 52(sp) +; RV32I-FPELIM-NEXT: addi a7, sp, 40 ; RV32I-FPELIM-NEXT: call callee_large_scalars_exhausted_regs ; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 64 @@ -386,28 +386,28 @@ define i32 @caller_large_scalars_exhausted_regs() nounwind { ; RV32I-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 64 -; RV32I-WITHFP-NEXT: addi a6, s0, -48 -; RV32I-WITHFP-NEXT: li a7, 9 -; RV32I-WITHFP-NEXT: lui t0, 524272 -; RV32I-WITHFP-NEXT: li t1, 8 +; RV32I-WITHFP-NEXT: addi a5, s0, -48 +; RV32I-WITHFP-NEXT: li a6, 9 +; RV32I-WITHFP-NEXT: lui a7, 524272 +; RV32I-WITHFP-NEXT: li t0, 8 ; RV32I-WITHFP-NEXT: li a0, 1 ; RV32I-WITHFP-NEXT: li a1, 2 ; RV32I-WITHFP-NEXT: li a2, 3 ; RV32I-WITHFP-NEXT: li a3, 4 ; RV32I-WITHFP-NEXT: li a4, 5 +; RV32I-WITHFP-NEXT: sw a6, 0(sp) +; RV32I-WITHFP-NEXT: sw a5, 4(sp) ; RV32I-WITHFP-NEXT: li a5, 6 -; RV32I-WITHFP-NEXT: sw a7, 0(sp) -; RV32I-WITHFP-NEXT: sw a6, 4(sp) -; RV32I-WITHFP-NEXT: li a6, 7 ; RV32I-WITHFP-NEXT: sw zero, -48(s0) ; RV32I-WITHFP-NEXT: sw zero, -44(s0) ; RV32I-WITHFP-NEXT: sw zero, -40(s0) -; RV32I-WITHFP-NEXT: sw t0, -36(s0) -; RV32I-WITHFP-NEXT: addi a7, s0, -24 -; RV32I-WITHFP-NEXT: sw t1, -24(s0) +; RV32I-WITHFP-NEXT: sw a7, -36(s0) +; RV32I-WITHFP-NEXT: li a6, 7 +; RV32I-WITHFP-NEXT: sw t0, -24(s0) ; RV32I-WITHFP-NEXT: sw zero, -20(s0) ; RV32I-WITHFP-NEXT: sw zero, -16(s0) ; RV32I-WITHFP-NEXT: sw zero, -12(s0) +; RV32I-WITHFP-NEXT: addi a7, s0, -24 ; RV32I-WITHFP-NEXT: call callee_large_scalars_exhausted_regs ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload @@ -614,15 +614,15 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-FPELIM-LABEL: callee_aligned_stack: ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: lw a0, 0(a2) -; RV32I-FPELIM-NEXT: lw a1, 8(sp) -; RV32I-FPELIM-NEXT: lw a2, 0(sp) -; RV32I-FPELIM-NEXT: lw a3, 20(sp) +; RV32I-FPELIM-NEXT: lw a1, 20(sp) +; RV32I-FPELIM-NEXT: lw a2, 8(sp) +; RV32I-FPELIM-NEXT: lw a3, 0(sp) ; RV32I-FPELIM-NEXT: lw a4, 16(sp) ; RV32I-FPELIM-NEXT: add a0, a0, a7 -; RV32I-FPELIM-NEXT: add a1, a2, a1 +; RV32I-FPELIM-NEXT: add a2, a3, a2 +; RV32I-FPELIM-NEXT: add a0, a0, a2 +; RV32I-FPELIM-NEXT: add a1, a4, a1 ; RV32I-FPELIM-NEXT: add a0, a0, a1 -; RV32I-FPELIM-NEXT: add a3, a4, a3 -; RV32I-FPELIM-NEXT: add a0, a0, a3 ; RV32I-FPELIM-NEXT: ret ; ; RV32I-WITHFP-LABEL: callee_aligned_stack: @@ -632,15 +632,15 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; RV32I-WITHFP-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 16 ; RV32I-WITHFP-NEXT: lw a0, 0(a2) -; RV32I-WITHFP-NEXT: lw a1, 8(s0) -; RV32I-WITHFP-NEXT: lw a2, 0(s0) -; RV32I-WITHFP-NEXT: lw a3, 20(s0) +; RV32I-WITHFP-NEXT: lw a1, 20(s0) +; RV32I-WITHFP-NEXT: lw a2, 8(s0) +; RV32I-WITHFP-NEXT: lw a3, 0(s0) ; RV32I-WITHFP-NEXT: lw a4, 16(s0) ; RV32I-WITHFP-NEXT: add a0, a0, a7 -; RV32I-WITHFP-NEXT: add a1, a2, a1 +; RV32I-WITHFP-NEXT: add a2, a3, a2 +; RV32I-WITHFP-NEXT: add a0, a0, a2 +; RV32I-WITHFP-NEXT: add a1, a4, a1 ; RV32I-WITHFP-NEXT: add a0, a0, a1 -; RV32I-WITHFP-NEXT: add a3, a4, a3 -; RV32I-WITHFP-NEXT: add a0, a0, a3 ; RV32I-WITHFP-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: addi sp, sp, 16 @@ -664,38 +664,38 @@ define void @caller_aligned_stack() nounwind { ; RV32I-FPELIM: # %bb.0: ; RV32I-FPELIM-NEXT: addi sp, sp, -64 ; RV32I-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32I-FPELIM-NEXT: li a5, 19 -; RV32I-FPELIM-NEXT: li a6, 18 -; RV32I-FPELIM-NEXT: li a7, 17 -; RV32I-FPELIM-NEXT: li t0, 16 -; RV32I-FPELIM-NEXT: li t1, 15 -; RV32I-FPELIM-NEXT: lui t2, 262153 -; RV32I-FPELIM-NEXT: lui t3, 545260 -; RV32I-FPELIM-NEXT: lui t4, 964690 -; RV32I-FPELIM-NEXT: lui t5, 335544 -; RV32I-FPELIM-NEXT: lui t6, 688509 +; RV32I-FPELIM-NEXT: li a4, 19 +; RV32I-FPELIM-NEXT: li a5, 18 +; RV32I-FPELIM-NEXT: li a6, 17 +; RV32I-FPELIM-NEXT: li a7, 16 +; RV32I-FPELIM-NEXT: li t0, 15 +; RV32I-FPELIM-NEXT: lui t1, 262153 +; RV32I-FPELIM-NEXT: lui t2, 545260 +; RV32I-FPELIM-NEXT: lui t3, 964690 +; RV32I-FPELIM-NEXT: lui t4, 335544 +; RV32I-FPELIM-NEXT: lui t5, 688509 ; RV32I-FPELIM-NEXT: li a0, 1 ; RV32I-FPELIM-NEXT: li a1, 11 ; RV32I-FPELIM-NEXT: addi a2, sp, 32 ; RV32I-FPELIM-NEXT: li a3, 12 +; RV32I-FPELIM-NEXT: sw a5, 20(sp) +; RV32I-FPELIM-NEXT: sw a4, 24(sp) ; RV32I-FPELIM-NEXT: li a4, 13 -; RV32I-FPELIM-NEXT: sw a6, 20(sp) -; RV32I-FPELIM-NEXT: sw a5, 24(sp) -; RV32I-FPELIM-NEXT: li a6, 4 -; RV32I-FPELIM-NEXT: sw t1, 0(sp) -; RV32I-FPELIM-NEXT: sw t0, 8(sp) +; RV32I-FPELIM-NEXT: sw t0, 0(sp) +; RV32I-FPELIM-NEXT: sw a7, 8(sp) ; RV32I-FPELIM-NEXT: sw zero, 12(sp) -; RV32I-FPELIM-NEXT: sw a7, 16(sp) +; RV32I-FPELIM-NEXT: sw a6, 16(sp) +; RV32I-FPELIM-NEXT: li a6, 4 +; RV32I-FPELIM-NEXT: addi a7, t1, 491 +; RV32I-FPELIM-NEXT: addi t0, t2, -1967 +; RV32I-FPELIM-NEXT: addi t1, t3, -328 +; RV32I-FPELIM-NEXT: addi t2, t4, 1311 +; RV32I-FPELIM-NEXT: addi a5, t5, -2048 +; RV32I-FPELIM-NEXT: sw t2, 32(sp) +; RV32I-FPELIM-NEXT: sw t1, 36(sp) +; RV32I-FPELIM-NEXT: sw t0, 40(sp) +; RV32I-FPELIM-NEXT: sw a7, 44(sp) ; RV32I-FPELIM-NEXT: li a7, 14 -; RV32I-FPELIM-NEXT: addi t0, t2, 491 -; RV32I-FPELIM-NEXT: addi t1, t3, -1967 -; RV32I-FPELIM-NEXT: addi t2, t4, -328 -; RV32I-FPELIM-NEXT: addi t3, t5, 1311 -; RV32I-FPELIM-NEXT: addi a5, t6, -2048 -; RV32I-FPELIM-NEXT: sw t3, 32(sp) -; RV32I-FPELIM-NEXT: sw t2, 36(sp) -; RV32I-FPELIM-NEXT: sw t1, 40(sp) -; RV32I-FPELIM-NEXT: sw t0, 44(sp) ; RV32I-FPELIM-NEXT: call callee_aligned_stack ; RV32I-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-FPELIM-NEXT: addi sp, sp, 64 @@ -707,38 +707,38 @@ define void @caller_aligned_stack() nounwind { ; RV32I-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32I-WITHFP-NEXT: addi s0, sp, 64 -; RV32I-WITHFP-NEXT: li a5, 19 -; RV32I-WITHFP-NEXT: li a6, 18 -; RV32I-WITHFP-NEXT: li a7, 17 -; RV32I-WITHFP-NEXT: li t0, 16 -; RV32I-WITHFP-NEXT: li t1, 15 -; RV32I-WITHFP-NEXT: lui t2, 262153 -; RV32I-WITHFP-NEXT: lui t3, 545260 -; RV32I-WITHFP-NEXT: lui t4, 964690 -; RV32I-WITHFP-NEXT: lui t5, 335544 -; RV32I-WITHFP-NEXT: lui t6, 688509 +; RV32I-WITHFP-NEXT: li a4, 19 +; RV32I-WITHFP-NEXT: li a5, 18 +; RV32I-WITHFP-NEXT: li a6, 17 +; RV32I-WITHFP-NEXT: li a7, 16 +; RV32I-WITHFP-NEXT: li t0, 15 +; RV32I-WITHFP-NEXT: lui t1, 262153 +; RV32I-WITHFP-NEXT: lui t2, 545260 +; RV32I-WITHFP-NEXT: lui t3, 964690 +; RV32I-WITHFP-NEXT: lui t4, 335544 +; RV32I-WITHFP-NEXT: lui t5, 688509 ; RV32I-WITHFP-NEXT: li a0, 1 ; RV32I-WITHFP-NEXT: li a1, 11 ; RV32I-WITHFP-NEXT: addi a2, s0, -32 ; RV32I-WITHFP-NEXT: li a3, 12 +; RV32I-WITHFP-NEXT: sw a5, 20(sp) +; RV32I-WITHFP-NEXT: sw a4, 24(sp) ; RV32I-WITHFP-NEXT: li a4, 13 -; RV32I-WITHFP-NEXT: sw a6, 20(sp) -; RV32I-WITHFP-NEXT: sw a5, 24(sp) -; RV32I-WITHFP-NEXT: li a6, 4 -; RV32I-WITHFP-NEXT: sw t1, 0(sp) -; RV32I-WITHFP-NEXT: sw t0, 8(sp) +; RV32I-WITHFP-NEXT: sw t0, 0(sp) +; RV32I-WITHFP-NEXT: sw a7, 8(sp) ; RV32I-WITHFP-NEXT: sw zero, 12(sp) -; RV32I-WITHFP-NEXT: sw a7, 16(sp) +; RV32I-WITHFP-NEXT: sw a6, 16(sp) +; RV32I-WITHFP-NEXT: li a6, 4 +; RV32I-WITHFP-NEXT: addi a7, t1, 491 +; RV32I-WITHFP-NEXT: addi t0, t2, -1967 +; RV32I-WITHFP-NEXT: addi t1, t3, -328 +; RV32I-WITHFP-NEXT: addi t2, t4, 1311 +; RV32I-WITHFP-NEXT: addi a5, t5, -2048 +; RV32I-WITHFP-NEXT: sw t2, -32(s0) +; RV32I-WITHFP-NEXT: sw t1, -28(s0) +; RV32I-WITHFP-NEXT: sw t0, -24(s0) +; RV32I-WITHFP-NEXT: sw a7, -20(s0) ; RV32I-WITHFP-NEXT: li a7, 14 -; RV32I-WITHFP-NEXT: addi t0, t2, 491 -; RV32I-WITHFP-NEXT: addi t1, t3, -1967 -; RV32I-WITHFP-NEXT: addi t2, t4, -328 -; RV32I-WITHFP-NEXT: addi t3, t5, 1311 -; RV32I-WITHFP-NEXT: addi a5, t6, -2048 -; RV32I-WITHFP-NEXT: sw t3, -32(s0) -; RV32I-WITHFP-NEXT: sw t2, -28(s0) -; RV32I-WITHFP-NEXT: sw t1, -24(s0) -; RV32I-WITHFP-NEXT: sw t0, -20(s0) ; RV32I-WITHFP-NEXT: call callee_aligned_stack ; RV32I-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32I-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll index 1dac139503ba7..5e37c83d30ba8 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32.ll @@ -111,8 +111,8 @@ define i32 @caller_float_on_stack() nounwind { ; RV32I-FPELIM-NEXT: li a0, 1 ; RV32I-FPELIM-NEXT: li a2, 2 ; RV32I-FPELIM-NEXT: li a4, 3 -; RV32I-FPELIM-NEXT: li a6, 4 ; RV32I-FPELIM-NEXT: sw a1, 0(sp) +; RV32I-FPELIM-NEXT: li a6, 4 ; RV32I-FPELIM-NEXT: li a1, 0 ; RV32I-FPELIM-NEXT: li a3, 0 ; RV32I-FPELIM-NEXT: li a5, 0 @@ -132,8 +132,8 @@ define i32 @caller_float_on_stack() nounwind { ; RV32I-WITHFP-NEXT: li a0, 1 ; RV32I-WITHFP-NEXT: li a2, 2 ; RV32I-WITHFP-NEXT: li a4, 3 -; RV32I-WITHFP-NEXT: li a6, 4 ; RV32I-WITHFP-NEXT: sw a1, 0(sp) +; RV32I-WITHFP-NEXT: li a6, 4 ; RV32I-WITHFP-NEXT: li a1, 0 ; RV32I-WITHFP-NEXT: li a3, 0 ; RV32I-WITHFP-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll index 7630d5b8f77ef..3ae76de6a65f7 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32d.ll @@ -51,14 +51,14 @@ define i32 @caller_double_in_fpr_exhausted_gprs() nounwind { ; RV32-ILP32D: # %bb.0: ; RV32-ILP32D-NEXT: addi sp, sp, -16 ; RV32-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32D-NEXT: li a1, 5 -; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI3_0) -; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI3_0)(a0) +; RV32-ILP32D-NEXT: li a0, 5 +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI3_0) +; RV32-ILP32D-NEXT: sw a0, 0(sp) +; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI3_0)(a1) ; RV32-ILP32D-NEXT: li a0, 1 ; RV32-ILP32D-NEXT: li a2, 2 ; RV32-ILP32D-NEXT: li a4, 3 ; RV32-ILP32D-NEXT: li a6, 4 -; RV32-ILP32D-NEXT: sw a1, 0(sp) ; RV32-ILP32D-NEXT: li a1, 0 ; RV32-ILP32D-NEXT: li a3, 0 ; RV32-ILP32D-NEXT: li a5, 0 @@ -147,16 +147,17 @@ define i32 @caller_double_in_gpr_and_stack_almost_exhausted_gprs_fprs() nounwind ; RV32-ILP32D: # %bb.0: ; RV32-ILP32D-NEXT: addi sp, sp, -16 ; RV32-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32D-NEXT: lui a1, 262816 -; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_0) +; RV32-ILP32D-NEXT: lui a0, 262816 +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI7_0) ; RV32-ILP32D-NEXT: lui a2, %hi(.LCPI7_1) ; RV32-ILP32D-NEXT: lui a3, %hi(.LCPI7_2) ; RV32-ILP32D-NEXT: lui a4, %hi(.LCPI7_3) ; RV32-ILP32D-NEXT: lui a5, %hi(.LCPI7_4) ; RV32-ILP32D-NEXT: lui a6, %hi(.LCPI7_5) ; RV32-ILP32D-NEXT: lui a7, %hi(.LCPI7_6) -; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI7_0)(a0) +; RV32-ILP32D-NEXT: sw a0, 0(sp) ; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI7_7) +; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI7_0)(a1) ; RV32-ILP32D-NEXT: fld fa1, %lo(.LCPI7_1)(a2) ; RV32-ILP32D-NEXT: fld fa2, %lo(.LCPI7_2)(a3) ; RV32-ILP32D-NEXT: fld fa3, %lo(.LCPI7_3)(a4) @@ -168,7 +169,6 @@ define i32 @caller_double_in_gpr_and_stack_almost_exhausted_gprs_fprs() nounwind ; RV32-ILP32D-NEXT: li a2, 3 ; RV32-ILP32D-NEXT: li a4, 5 ; RV32-ILP32D-NEXT: li a6, 7 -; RV32-ILP32D-NEXT: sw a1, 0(sp) ; RV32-ILP32D-NEXT: li a1, 0 ; RV32-ILP32D-NEXT: li a3, 0 ; RV32-ILP32D-NEXT: li a5, 0 @@ -203,29 +203,29 @@ define i32 @caller_double_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ILP32D: # %bb.0: ; RV32-ILP32D-NEXT: addi sp, sp, -16 ; RV32-ILP32D-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32D-NEXT: lui a1, 262816 -; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_0) +; RV32-ILP32D-NEXT: lui a0, 262816 +; RV32-ILP32D-NEXT: lui a1, %hi(.LCPI9_0) ; RV32-ILP32D-NEXT: lui a2, %hi(.LCPI9_1) ; RV32-ILP32D-NEXT: lui a3, %hi(.LCPI9_2) ; RV32-ILP32D-NEXT: lui a4, %hi(.LCPI9_3) ; RV32-ILP32D-NEXT: lui a5, %hi(.LCPI9_4) ; RV32-ILP32D-NEXT: lui a6, %hi(.LCPI9_5) ; RV32-ILP32D-NEXT: lui a7, %hi(.LCPI9_6) -; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI9_0)(a0) -; RV32-ILP32D-NEXT: lui t0, %hi(.LCPI9_7) +; RV32-ILP32D-NEXT: sw zero, 0(sp) +; RV32-ILP32D-NEXT: sw a0, 4(sp) +; RV32-ILP32D-NEXT: lui a0, %hi(.LCPI9_7) +; RV32-ILP32D-NEXT: fld fa0, %lo(.LCPI9_0)(a1) ; RV32-ILP32D-NEXT: fld fa1, %lo(.LCPI9_1)(a2) -; RV32-ILP32D-NEXT: li a0, 1 ; RV32-ILP32D-NEXT: fld fa2, %lo(.LCPI9_2)(a3) ; RV32-ILP32D-NEXT: fld fa3, %lo(.LCPI9_3)(a4) ; RV32-ILP32D-NEXT: fld fa4, %lo(.LCPI9_4)(a5) ; RV32-ILP32D-NEXT: fld fa5, %lo(.LCPI9_5)(a6) ; RV32-ILP32D-NEXT: fld fa6, %lo(.LCPI9_6)(a7) -; RV32-ILP32D-NEXT: fld fa7, %lo(.LCPI9_7)(t0) +; RV32-ILP32D-NEXT: fld fa7, %lo(.LCPI9_7)(a0) +; RV32-ILP32D-NEXT: li a0, 1 ; RV32-ILP32D-NEXT: li a2, 3 ; RV32-ILP32D-NEXT: li a4, 5 ; RV32-ILP32D-NEXT: li a6, 7 -; RV32-ILP32D-NEXT: sw zero, 0(sp) -; RV32-ILP32D-NEXT: sw a1, 4(sp) ; RV32-ILP32D-NEXT: li a1, 0 ; RV32-ILP32D-NEXT: li a3, 0 ; RV32-ILP32D-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll index e16bed5400300..51def89ed6c3a 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32e.ll @@ -224,10 +224,10 @@ define i32 @caller_float_on_stack() { ; ILP32E-FPELIM-NEXT: li a3, 4 ; ILP32E-FPELIM-NEXT: li a0, 1 ; ILP32E-FPELIM-NEXT: li a2, 2 -; ILP32E-FPELIM-NEXT: li a4, 3 ; ILP32E-FPELIM-NEXT: sw a3, 0(sp) ; ILP32E-FPELIM-NEXT: sw zero, 4(sp) ; ILP32E-FPELIM-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-NEXT: li a4, 3 ; ILP32E-FPELIM-NEXT: li a1, 0 ; ILP32E-FPELIM-NEXT: li a3, 0 ; ILP32E-FPELIM-NEXT: li a5, 0 @@ -252,10 +252,10 @@ define i32 @caller_float_on_stack() { ; ILP32E-WITHFP-NEXT: li a3, 4 ; ILP32E-WITHFP-NEXT: li a0, 1 ; ILP32E-WITHFP-NEXT: li a2, 2 -; ILP32E-WITHFP-NEXT: li a4, 3 ; ILP32E-WITHFP-NEXT: sw a3, 0(sp) ; ILP32E-WITHFP-NEXT: sw zero, 4(sp) ; ILP32E-WITHFP-NEXT: sw a1, 8(sp) +; ILP32E-WITHFP-NEXT: li a4, 3 ; ILP32E-WITHFP-NEXT: li a1, 0 ; ILP32E-WITHFP-NEXT: li a3, 0 ; ILP32E-WITHFP-NEXT: li a5, 0 @@ -280,10 +280,10 @@ define i32 @caller_float_on_stack() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 2 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 3 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 0(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 3 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 0 @@ -306,10 +306,10 @@ define i32 @caller_float_on_stack() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 2 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 3 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 0(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 3 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 0 @@ -589,16 +589,16 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-FPELIM-LABEL: callee_aligned_stack: ; ILP32E-FPELIM: # %bb.0: ; ILP32E-FPELIM-NEXT: lw a0, 0(a2) -; ILP32E-FPELIM-NEXT: lw a1, 12(sp) -; ILP32E-FPELIM-NEXT: lw a2, 4(sp) +; ILP32E-FPELIM-NEXT: lw a1, 24(sp) +; ILP32E-FPELIM-NEXT: lw a2, 12(sp) ; ILP32E-FPELIM-NEXT: lw a3, 8(sp) -; ILP32E-FPELIM-NEXT: lw a4, 24(sp) +; ILP32E-FPELIM-NEXT: lw a4, 4(sp) ; ILP32E-FPELIM-NEXT: lw a5, 20(sp) +; ILP32E-FPELIM-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-NEXT: add a2, a3, a2 ; ILP32E-FPELIM-NEXT: add a0, a0, a2 -; ILP32E-FPELIM-NEXT: add a1, a3, a1 +; ILP32E-FPELIM-NEXT: add a1, a5, a1 ; ILP32E-FPELIM-NEXT: add a0, a0, a1 -; ILP32E-FPELIM-NEXT: add a4, a5, a4 -; ILP32E-FPELIM-NEXT: add a0, a0, a4 ; ILP32E-FPELIM-NEXT: ret ; ; ILP32E-WITHFP-LABEL: callee_aligned_stack: @@ -612,16 +612,16 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-WITHFP-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: lw a0, 0(a2) -; ILP32E-WITHFP-NEXT: lw a1, 12(s0) -; ILP32E-WITHFP-NEXT: lw a2, 4(s0) +; ILP32E-WITHFP-NEXT: lw a1, 24(s0) +; ILP32E-WITHFP-NEXT: lw a2, 12(s0) ; ILP32E-WITHFP-NEXT: lw a3, 8(s0) -; ILP32E-WITHFP-NEXT: lw a4, 24(s0) +; ILP32E-WITHFP-NEXT: lw a4, 4(s0) ; ILP32E-WITHFP-NEXT: lw a5, 20(s0) +; ILP32E-WITHFP-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-NEXT: add a2, a3, a2 ; ILP32E-WITHFP-NEXT: add a0, a0, a2 -; ILP32E-WITHFP-NEXT: add a1, a3, a1 +; ILP32E-WITHFP-NEXT: add a1, a5, a1 ; ILP32E-WITHFP-NEXT: add a0, a0, a1 -; ILP32E-WITHFP-NEXT: add a4, a5, a4 -; ILP32E-WITHFP-NEXT: add a0, a0, a4 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa sp, 8 ; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload ; ILP32E-WITHFP-NEXT: lw s0, 0(sp) # 4-byte Folded Reload @@ -634,16 +634,16 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-FPELIM-SAVE-RESTORE-LABEL: callee_aligned_stack: ; ILP32E-FPELIM-SAVE-RESTORE: # %bb.0: ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(a2) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a2, 12(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a2, a3, a2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a2 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a1, a3, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a1, a5, a1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a4, a5, a4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: add a0, a0, a4 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret ; ; ILP32E-WITHFP-SAVE-RESTORE-LABEL: callee_aligned_stack: @@ -655,16 +655,16 @@ define i32 @callee_aligned_stack(i32 %a, i32 %b, fp128 %c, i32 %d, i32 %e, i64 % ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(a2) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(s0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 4(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 24(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a2, 12(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 8(s0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 24(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 4(s0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 20(s0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a2, a3, a2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a2 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a1, a3, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a1, a5, a1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a4, a5, a4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: add a0, a0, a4 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 %1 = bitcast fp128 %c to i128 @@ -694,43 +694,43 @@ define void @caller_aligned_stack() { ; ILP32E-FPELIM-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-NEXT: li a3, 18 -; ILP32E-FPELIM-NEXT: li a4, 17 -; ILP32E-FPELIM-NEXT: li a5, 16 -; ILP32E-FPELIM-NEXT: lui a6, 262236 -; ILP32E-FPELIM-NEXT: lui a7, 377487 -; ILP32E-FPELIM-NEXT: li t0, 15 -; ILP32E-FPELIM-NEXT: li t1, 14 -; ILP32E-FPELIM-NEXT: li t2, 4 -; ILP32E-FPELIM-NEXT: lui t3, 262153 -; ILP32E-FPELIM-NEXT: lui t4, 545260 -; ILP32E-FPELIM-NEXT: lui t5, 964690 -; ILP32E-FPELIM-NEXT: lui t6, 335544 -; ILP32E-FPELIM-NEXT: lui s2, 688509 +; ILP32E-FPELIM-NEXT: li a2, 18 +; ILP32E-FPELIM-NEXT: li a3, 17 +; ILP32E-FPELIM-NEXT: li a4, 16 +; ILP32E-FPELIM-NEXT: lui a5, 262236 +; ILP32E-FPELIM-NEXT: lui a6, 377487 +; ILP32E-FPELIM-NEXT: li a7, 15 +; ILP32E-FPELIM-NEXT: li t0, 14 +; ILP32E-FPELIM-NEXT: li t1, 4 +; ILP32E-FPELIM-NEXT: lui t2, 262153 +; ILP32E-FPELIM-NEXT: lui t3, 545260 +; ILP32E-FPELIM-NEXT: lui t4, 964690 +; ILP32E-FPELIM-NEXT: lui t5, 335544 +; ILP32E-FPELIM-NEXT: lui t6, 688509 ; ILP32E-FPELIM-NEXT: li a0, 1 ; ILP32E-FPELIM-NEXT: li a1, 11 +; ILP32E-FPELIM-NEXT: addi a5, a5, 655 +; ILP32E-FPELIM-NEXT: sw a5, 16(sp) +; ILP32E-FPELIM-NEXT: sw a4, 20(sp) +; ILP32E-FPELIM-NEXT: sw a3, 24(sp) +; ILP32E-FPELIM-NEXT: sw a2, 28(sp) ; ILP32E-FPELIM-NEXT: addi a2, sp, 32 -; ILP32E-FPELIM-NEXT: addi a6, a6, 655 -; ILP32E-FPELIM-NEXT: sw a6, 16(sp) -; ILP32E-FPELIM-NEXT: sw a5, 20(sp) -; ILP32E-FPELIM-NEXT: sw a4, 24(sp) -; ILP32E-FPELIM-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-NEXT: addi a3, a6, 1475 +; ILP32E-FPELIM-NEXT: sw t1, 0(sp) +; ILP32E-FPELIM-NEXT: sw t0, 4(sp) +; ILP32E-FPELIM-NEXT: sw a7, 8(sp) +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) ; ILP32E-FPELIM-NEXT: li a3, 12 -; ILP32E-FPELIM-NEXT: addi a4, a7, 1475 -; ILP32E-FPELIM-NEXT: sw t2, 0(sp) -; ILP32E-FPELIM-NEXT: sw t1, 4(sp) -; ILP32E-FPELIM-NEXT: sw t0, 8(sp) -; ILP32E-FPELIM-NEXT: sw a4, 12(sp) +; ILP32E-FPELIM-NEXT: addi a4, t2, 491 +; ILP32E-FPELIM-NEXT: addi a6, t3, -1967 +; ILP32E-FPELIM-NEXT: addi a7, t4, -328 +; ILP32E-FPELIM-NEXT: addi t0, t5, 1311 +; ILP32E-FPELIM-NEXT: addi a5, t6, -2048 +; ILP32E-FPELIM-NEXT: sw t0, 32(sp) +; ILP32E-FPELIM-NEXT: sw a7, 36(sp) +; ILP32E-FPELIM-NEXT: sw a6, 40(sp) +; ILP32E-FPELIM-NEXT: sw a4, 44(sp) ; ILP32E-FPELIM-NEXT: li a4, 13 -; ILP32E-FPELIM-NEXT: addi a6, t3, 491 -; ILP32E-FPELIM-NEXT: addi a7, t4, -1967 -; ILP32E-FPELIM-NEXT: addi t0, t5, -328 -; ILP32E-FPELIM-NEXT: addi t1, t6, 1311 -; ILP32E-FPELIM-NEXT: addi a5, s2, -2048 -; ILP32E-FPELIM-NEXT: sw t1, 32(sp) -; ILP32E-FPELIM-NEXT: sw t0, 36(sp) -; ILP32E-FPELIM-NEXT: sw a7, 40(sp) -; ILP32E-FPELIM-NEXT: sw a6, 44(sp) ; ILP32E-FPELIM-NEXT: call callee_aligned_stack ; ILP32E-FPELIM-NEXT: addi sp, s0, -64 ; ILP32E-FPELIM-NEXT: .cfi_def_cfa sp, 64 @@ -753,43 +753,43 @@ define void @caller_aligned_stack() { ; ILP32E-WITHFP-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-NEXT: li a3, 18 -; ILP32E-WITHFP-NEXT: li a4, 17 -; ILP32E-WITHFP-NEXT: li a5, 16 -; ILP32E-WITHFP-NEXT: lui a6, 262236 -; ILP32E-WITHFP-NEXT: lui a7, 377487 -; ILP32E-WITHFP-NEXT: li t0, 15 -; ILP32E-WITHFP-NEXT: li t1, 14 -; ILP32E-WITHFP-NEXT: li t2, 4 -; ILP32E-WITHFP-NEXT: lui t3, 262153 -; ILP32E-WITHFP-NEXT: lui t4, 545260 -; ILP32E-WITHFP-NEXT: lui t5, 964690 -; ILP32E-WITHFP-NEXT: lui t6, 335544 -; ILP32E-WITHFP-NEXT: lui s2, 688509 +; ILP32E-WITHFP-NEXT: li a2, 18 +; ILP32E-WITHFP-NEXT: li a3, 17 +; ILP32E-WITHFP-NEXT: li a4, 16 +; ILP32E-WITHFP-NEXT: lui a5, 262236 +; ILP32E-WITHFP-NEXT: lui a6, 377487 +; ILP32E-WITHFP-NEXT: li a7, 15 +; ILP32E-WITHFP-NEXT: li t0, 14 +; ILP32E-WITHFP-NEXT: li t1, 4 +; ILP32E-WITHFP-NEXT: lui t2, 262153 +; ILP32E-WITHFP-NEXT: lui t3, 545260 +; ILP32E-WITHFP-NEXT: lui t4, 964690 +; ILP32E-WITHFP-NEXT: lui t5, 335544 +; ILP32E-WITHFP-NEXT: lui t6, 688509 ; ILP32E-WITHFP-NEXT: li a0, 1 ; ILP32E-WITHFP-NEXT: li a1, 11 +; ILP32E-WITHFP-NEXT: addi a5, a5, 655 +; ILP32E-WITHFP-NEXT: sw a5, 16(sp) +; ILP32E-WITHFP-NEXT: sw a4, 20(sp) +; ILP32E-WITHFP-NEXT: sw a3, 24(sp) +; ILP32E-WITHFP-NEXT: sw a2, 28(sp) ; ILP32E-WITHFP-NEXT: addi a2, sp, 32 -; ILP32E-WITHFP-NEXT: addi a6, a6, 655 -; ILP32E-WITHFP-NEXT: sw a6, 16(sp) -; ILP32E-WITHFP-NEXT: sw a5, 20(sp) -; ILP32E-WITHFP-NEXT: sw a4, 24(sp) -; ILP32E-WITHFP-NEXT: sw a3, 28(sp) +; ILP32E-WITHFP-NEXT: addi a3, a6, 1475 +; ILP32E-WITHFP-NEXT: sw t1, 0(sp) +; ILP32E-WITHFP-NEXT: sw t0, 4(sp) +; ILP32E-WITHFP-NEXT: sw a7, 8(sp) +; ILP32E-WITHFP-NEXT: sw a3, 12(sp) ; ILP32E-WITHFP-NEXT: li a3, 12 -; ILP32E-WITHFP-NEXT: addi a4, a7, 1475 -; ILP32E-WITHFP-NEXT: sw t2, 0(sp) -; ILP32E-WITHFP-NEXT: sw t1, 4(sp) -; ILP32E-WITHFP-NEXT: sw t0, 8(sp) -; ILP32E-WITHFP-NEXT: sw a4, 12(sp) +; ILP32E-WITHFP-NEXT: addi a4, t2, 491 +; ILP32E-WITHFP-NEXT: addi a6, t3, -1967 +; ILP32E-WITHFP-NEXT: addi a7, t4, -328 +; ILP32E-WITHFP-NEXT: addi t0, t5, 1311 +; ILP32E-WITHFP-NEXT: addi a5, t6, -2048 +; ILP32E-WITHFP-NEXT: sw t0, 32(sp) +; ILP32E-WITHFP-NEXT: sw a7, 36(sp) +; ILP32E-WITHFP-NEXT: sw a6, 40(sp) +; ILP32E-WITHFP-NEXT: sw a4, 44(sp) ; ILP32E-WITHFP-NEXT: li a4, 13 -; ILP32E-WITHFP-NEXT: addi a6, t3, 491 -; ILP32E-WITHFP-NEXT: addi a7, t4, -1967 -; ILP32E-WITHFP-NEXT: addi t0, t5, -328 -; ILP32E-WITHFP-NEXT: addi t1, t6, 1311 -; ILP32E-WITHFP-NEXT: addi a5, s2, -2048 -; ILP32E-WITHFP-NEXT: sw t1, 32(sp) -; ILP32E-WITHFP-NEXT: sw t0, 36(sp) -; ILP32E-WITHFP-NEXT: sw a7, 40(sp) -; ILP32E-WITHFP-NEXT: sw a6, 44(sp) ; ILP32E-WITHFP-NEXT: call callee_aligned_stack ; ILP32E-WITHFP-NEXT: addi sp, s0, -64 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa sp, 64 @@ -812,43 +812,43 @@ define void @caller_aligned_stack() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 18 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 17 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a6, 262236 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a7, 377487 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t0, 15 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t1, 14 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t2, 4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t3, 262153 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t4, 545260 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t5, 964690 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t6, 335544 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui s2, 688509 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 18 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 17 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a5, 262236 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a6, 377487 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a7, 15 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t0, 14 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t1, 4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t2, 262153 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t3, 545260 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t4, 964690 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t5, 335544 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t6, 688509 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 11 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, a5, 655 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a5, 16(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 20(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 28(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a2, sp, 32 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, a6, 655 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 16(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a5, 20(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 24(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a3, a6, 1475 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t1, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 12(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 12 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a4, a7, 1475 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t2, 0(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t1, 4(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a4, t2, 491 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, t3, -1967 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a7, t4, -328 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t0, t5, 1311 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, t6, -2048 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 32(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 36(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 44(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 13 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, t3, 491 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a7, t4, -1967 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t0, t5, -328 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi t1, t6, 1311 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, s2, -2048 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t1, 32(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 36(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 40(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 44(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_aligned_stack ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 64 @@ -867,43 +867,43 @@ define void @caller_aligned_stack() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 18 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 17 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a6, 262236 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a7, 377487 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t0, 15 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t1, 14 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t2, 4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t3, 262153 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t4, 545260 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t5, 964690 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t6, 335544 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui s2, 688509 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 18 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 17 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a5, 262236 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a6, 377487 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a7, 15 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t0, 14 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t1, 4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t2, 262153 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t3, 545260 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t4, 964690 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t5, 335544 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t6, 688509 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 11 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, a5, 655 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a5, 16(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 20(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 28(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a2, sp, 32 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, a6, 655 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 16(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a5, 20(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 24(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a3, a6, 1475 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t1, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 12(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 12 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a4, a7, 1475 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t2, 0(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t1, 4(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a4, t2, 491 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, t3, -1967 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a7, t4, -328 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t0, t5, 1311 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, t6, -2048 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 32(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 36(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 44(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 13 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, t3, 491 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a7, t4, -1967 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t0, t5, -328 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi t1, t6, 1311 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, s2, -2048 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t1, 32(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 36(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 40(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 44(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_aligned_stack ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 64 @@ -1272,17 +1272,17 @@ define i32 @caller_many_scalars() { ; ILP32E-FPELIM-NEXT: sw ra, 16(sp) # 4-byte Folded Spill ; ILP32E-FPELIM-NEXT: .cfi_offset ra, -4 ; ILP32E-FPELIM-NEXT: li a4, 8 -; ILP32E-FPELIM-NEXT: li a6, 7 -; ILP32E-FPELIM-NEXT: li a7, 6 +; ILP32E-FPELIM-NEXT: li a5, 7 +; ILP32E-FPELIM-NEXT: li a6, 6 ; ILP32E-FPELIM-NEXT: li a0, 1 ; ILP32E-FPELIM-NEXT: li a1, 2 ; ILP32E-FPELIM-NEXT: li a2, 3 ; ILP32E-FPELIM-NEXT: li a3, 4 -; ILP32E-FPELIM-NEXT: li a5, 5 -; ILP32E-FPELIM-NEXT: sw a7, 0(sp) -; ILP32E-FPELIM-NEXT: sw a6, 4(sp) +; ILP32E-FPELIM-NEXT: sw a6, 0(sp) +; ILP32E-FPELIM-NEXT: sw a5, 4(sp) ; ILP32E-FPELIM-NEXT: sw zero, 8(sp) ; ILP32E-FPELIM-NEXT: sw a4, 12(sp) +; ILP32E-FPELIM-NEXT: li a5, 5 ; ILP32E-FPELIM-NEXT: li a4, 0 ; ILP32E-FPELIM-NEXT: call callee_many_scalars ; ILP32E-FPELIM-NEXT: lw ra, 16(sp) # 4-byte Folded Reload @@ -1302,17 +1302,17 @@ define i32 @caller_many_scalars() { ; ILP32E-WITHFP-NEXT: addi s0, sp, 24 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: li a4, 8 -; ILP32E-WITHFP-NEXT: li a6, 7 -; ILP32E-WITHFP-NEXT: li a7, 6 +; ILP32E-WITHFP-NEXT: li a5, 7 +; ILP32E-WITHFP-NEXT: li a6, 6 ; ILP32E-WITHFP-NEXT: li a0, 1 ; ILP32E-WITHFP-NEXT: li a1, 2 ; ILP32E-WITHFP-NEXT: li a2, 3 ; ILP32E-WITHFP-NEXT: li a3, 4 -; ILP32E-WITHFP-NEXT: li a5, 5 -; ILP32E-WITHFP-NEXT: sw a7, 0(sp) -; ILP32E-WITHFP-NEXT: sw a6, 4(sp) +; ILP32E-WITHFP-NEXT: sw a6, 0(sp) +; ILP32E-WITHFP-NEXT: sw a5, 4(sp) ; ILP32E-WITHFP-NEXT: sw zero, 8(sp) ; ILP32E-WITHFP-NEXT: sw a4, 12(sp) +; ILP32E-WITHFP-NEXT: li a5, 5 ; ILP32E-WITHFP-NEXT: li a4, 0 ; ILP32E-WITHFP-NEXT: call callee_many_scalars ; ILP32E-WITHFP-NEXT: .cfi_def_cfa sp, 24 @@ -1332,17 +1332,17 @@ define i32 @caller_many_scalars() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, -16 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa_offset 20 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 8 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a6, 7 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a7, 6 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a6, 6 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 5 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 0(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a5, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 5 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_many_scalars ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, sp, 16 @@ -1360,17 +1360,17 @@ define i32 @caller_many_scalars() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 24 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 8 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a6, 7 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a7, 6 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a6, 6 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 5 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 0(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a5, 4(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 5 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_many_scalars ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 24 @@ -1390,17 +1390,17 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-FPELIM-NEXT: lw a3, 4(a1) ; ILP32E-FPELIM-NEXT: lw a4, 8(a1) ; ILP32E-FPELIM-NEXT: lw a1, 12(a1) -; ILP32E-FPELIM-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-NEXT: lw a5, 0(a0) ; ILP32E-FPELIM-NEXT: lw a6, 4(a0) ; ILP32E-FPELIM-NEXT: lw a7, 8(a0) -; ILP32E-FPELIM-NEXT: lw a0, 0(a0) -; ILP32E-FPELIM-NEXT: xor a1, a5, a1 -; ILP32E-FPELIM-NEXT: xor a3, a6, a3 -; ILP32E-FPELIM-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-NEXT: xor a0, a0, a2 -; ILP32E-FPELIM-NEXT: or a1, a3, a1 -; ILP32E-FPELIM-NEXT: or a0, a0, a4 -; ILP32E-FPELIM-NEXT: or a0, a0, a1 +; ILP32E-FPELIM-NEXT: lw a0, 12(a0) +; ILP32E-FPELIM-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-NEXT: xor a1, a6, a3 +; ILP32E-FPELIM-NEXT: xor a3, a7, a4 +; ILP32E-FPELIM-NEXT: xor a2, a5, a2 +; ILP32E-FPELIM-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-NEXT: or a2, a2, a3 +; ILP32E-FPELIM-NEXT: or a0, a2, a0 ; ILP32E-FPELIM-NEXT: seqz a0, a0 ; ILP32E-FPELIM-NEXT: ret ; @@ -1418,17 +1418,17 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-WITHFP-NEXT: lw a3, 4(a1) ; ILP32E-WITHFP-NEXT: lw a4, 8(a1) ; ILP32E-WITHFP-NEXT: lw a1, 12(a1) -; ILP32E-WITHFP-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-NEXT: lw a5, 0(a0) ; ILP32E-WITHFP-NEXT: lw a6, 4(a0) ; ILP32E-WITHFP-NEXT: lw a7, 8(a0) -; ILP32E-WITHFP-NEXT: lw a0, 0(a0) -; ILP32E-WITHFP-NEXT: xor a1, a5, a1 -; ILP32E-WITHFP-NEXT: xor a3, a6, a3 -; ILP32E-WITHFP-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-NEXT: xor a0, a0, a2 -; ILP32E-WITHFP-NEXT: or a1, a3, a1 -; ILP32E-WITHFP-NEXT: or a0, a0, a4 -; ILP32E-WITHFP-NEXT: or a0, a0, a1 +; ILP32E-WITHFP-NEXT: lw a0, 12(a0) +; ILP32E-WITHFP-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-NEXT: xor a1, a6, a3 +; ILP32E-WITHFP-NEXT: xor a3, a7, a4 +; ILP32E-WITHFP-NEXT: xor a2, a5, a2 +; ILP32E-WITHFP-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-NEXT: or a2, a2, a3 +; ILP32E-WITHFP-NEXT: or a0, a2, a0 ; ILP32E-WITHFP-NEXT: seqz a0, a0 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa sp, 8 ; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload @@ -1445,17 +1445,17 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 4(a1) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 8(a1) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 0(a0) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 4(a0) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 8(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 0(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a5, a1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a6, a3 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a2 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a1, a3, a1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(a0) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a6, a3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a7, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a5, a2 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a2, a2, a3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret ; @@ -1471,17 +1471,17 @@ define i32 @callee_large_scalars(i128 %a, fp128 %b) { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 4(a1) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 8(a1) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 0(a0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 4(a0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 8(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 0(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a5, a1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a6, a3 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a2 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a1, a3, a1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(a0) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a0, a1 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a6, a3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a7, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a5, a2 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a2, a2, a3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 @@ -1503,18 +1503,18 @@ define i32 @caller_large_scalars() { ; ILP32E-FPELIM-NEXT: addi s0, sp, 48 ; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-NEXT: lui a1, 524272 -; ILP32E-FPELIM-NEXT: li a2, 1 -; ILP32E-FPELIM-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-NEXT: lui a0, 524272 +; ILP32E-FPELIM-NEXT: li a1, 1 ; ILP32E-FPELIM-NEXT: sw zero, 0(sp) ; ILP32E-FPELIM-NEXT: sw zero, 4(sp) ; ILP32E-FPELIM-NEXT: sw zero, 8(sp) -; ILP32E-FPELIM-NEXT: sw a1, 12(sp) -; ILP32E-FPELIM-NEXT: mv a1, sp -; ILP32E-FPELIM-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-NEXT: sw a1, 24(sp) ; ILP32E-FPELIM-NEXT: sw zero, 28(sp) ; ILP32E-FPELIM-NEXT: sw zero, 32(sp) ; ILP32E-FPELIM-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-NEXT: mv a1, sp ; ILP32E-FPELIM-NEXT: call callee_large_scalars ; ILP32E-FPELIM-NEXT: addi sp, s0, -48 ; ILP32E-FPELIM-NEXT: .cfi_def_cfa sp, 48 @@ -1537,18 +1537,18 @@ define i32 @caller_large_scalars() { ; ILP32E-WITHFP-NEXT: addi s0, sp, 48 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-NEXT: lui a1, 524272 -; ILP32E-WITHFP-NEXT: li a2, 1 -; ILP32E-WITHFP-NEXT: addi a0, sp, 24 +; ILP32E-WITHFP-NEXT: lui a0, 524272 +; ILP32E-WITHFP-NEXT: li a1, 1 ; ILP32E-WITHFP-NEXT: sw zero, 0(sp) ; ILP32E-WITHFP-NEXT: sw zero, 4(sp) ; ILP32E-WITHFP-NEXT: sw zero, 8(sp) -; ILP32E-WITHFP-NEXT: sw a1, 12(sp) -; ILP32E-WITHFP-NEXT: mv a1, sp -; ILP32E-WITHFP-NEXT: sw a2, 24(sp) +; ILP32E-WITHFP-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-NEXT: addi a0, sp, 24 +; ILP32E-WITHFP-NEXT: sw a1, 24(sp) ; ILP32E-WITHFP-NEXT: sw zero, 28(sp) ; ILP32E-WITHFP-NEXT: sw zero, 32(sp) ; ILP32E-WITHFP-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-NEXT: mv a1, sp ; ILP32E-WITHFP-NEXT: call callee_large_scalars ; ILP32E-WITHFP-NEXT: addi sp, s0, -48 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa sp, 48 @@ -1571,18 +1571,18 @@ define i32 @caller_large_scalars() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 48 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a1, 524272 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 1 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 0(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 4(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 12(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, sp -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a2, 24(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a0, sp, 24 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a1, 24(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 28(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 32(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 36(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: mv a1, sp ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -48 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 48 @@ -1601,18 +1601,18 @@ define i32 @caller_large_scalars() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 48 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a1, 524272 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 1 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a0, 524272 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 0(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 4(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 12(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, sp -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a2, 24(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a0, 12(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a0, sp, 24 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a1, 24(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 28(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 32(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 36(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: mv a1, sp ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -48 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 48 @@ -1636,17 +1636,17 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ILP32E-FPELIM-NEXT: lw a3, 4(a0) ; ILP32E-FPELIM-NEXT: lw a4, 8(a0) ; ILP32E-FPELIM-NEXT: lw a0, 12(a0) -; ILP32E-FPELIM-NEXT: lw a5, 12(a1) +; ILP32E-FPELIM-NEXT: lw a5, 0(a1) ; ILP32E-FPELIM-NEXT: lw a6, 4(a1) ; ILP32E-FPELIM-NEXT: lw a7, 8(a1) -; ILP32E-FPELIM-NEXT: lw a1, 0(a1) -; ILP32E-FPELIM-NEXT: xor a0, a5, a0 -; ILP32E-FPELIM-NEXT: xor a3, a6, a3 -; ILP32E-FPELIM-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-NEXT: xor a1, a1, a2 -; ILP32E-FPELIM-NEXT: or a0, a3, a0 -; ILP32E-FPELIM-NEXT: or a1, a1, a4 +; ILP32E-FPELIM-NEXT: lw a1, 12(a1) +; ILP32E-FPELIM-NEXT: xor a0, a1, a0 +; ILP32E-FPELIM-NEXT: xor a1, a6, a3 +; ILP32E-FPELIM-NEXT: xor a3, a7, a4 +; ILP32E-FPELIM-NEXT: xor a2, a5, a2 ; ILP32E-FPELIM-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-NEXT: or a2, a2, a3 +; ILP32E-FPELIM-NEXT: or a0, a2, a0 ; ILP32E-FPELIM-NEXT: seqz a0, a0 ; ILP32E-FPELIM-NEXT: ret ; @@ -1666,17 +1666,17 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ILP32E-WITHFP-NEXT: lw a3, 4(a0) ; ILP32E-WITHFP-NEXT: lw a4, 8(a0) ; ILP32E-WITHFP-NEXT: lw a0, 12(a0) -; ILP32E-WITHFP-NEXT: lw a5, 12(a1) +; ILP32E-WITHFP-NEXT: lw a5, 0(a1) ; ILP32E-WITHFP-NEXT: lw a6, 4(a1) ; ILP32E-WITHFP-NEXT: lw a7, 8(a1) -; ILP32E-WITHFP-NEXT: lw a1, 0(a1) -; ILP32E-WITHFP-NEXT: xor a0, a5, a0 -; ILP32E-WITHFP-NEXT: xor a3, a6, a3 -; ILP32E-WITHFP-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-NEXT: xor a1, a1, a2 -; ILP32E-WITHFP-NEXT: or a0, a3, a0 -; ILP32E-WITHFP-NEXT: or a1, a1, a4 +; ILP32E-WITHFP-NEXT: lw a1, 12(a1) +; ILP32E-WITHFP-NEXT: xor a0, a1, a0 +; ILP32E-WITHFP-NEXT: xor a1, a6, a3 +; ILP32E-WITHFP-NEXT: xor a3, a7, a4 +; ILP32E-WITHFP-NEXT: xor a2, a5, a2 ; ILP32E-WITHFP-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-NEXT: or a2, a2, a3 +; ILP32E-WITHFP-NEXT: or a0, a2, a0 ; ILP32E-WITHFP-NEXT: seqz a0, a0 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa sp, 8 ; ILP32E-WITHFP-NEXT: lw ra, 4(sp) # 4-byte Folded Reload @@ -1695,17 +1695,17 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a3, 4(a0) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a4, 8(a0) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a0, 12(a0) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 12(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a5, 0(a1) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a6, 4(a1) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a7, 8(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 0(a1) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a5, a0 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a6, a3 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a1, a2 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a3, a0 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a1, a1, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lw a1, 12(a1) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a1, a6, a3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a3, a7, a4 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: xor a2, a5, a2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a1, a0 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a2, a2, a3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: or a0, a2, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: ret ; @@ -1723,17 +1723,17 @@ define i32 @callee_large_scalars_exhausted_regs(i32 %a, i32 %b, i32 %c, i32 %d, ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a3, 4(a0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a4, 8(a0) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a0, 12(a0) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 12(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a5, 0(a1) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a6, 4(a1) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a7, 8(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 0(a1) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a5, a0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a6, a3 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a4, a7, a4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a1, a2 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a3, a0 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a1, a1, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lw a1, 12(a1) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a1, a6, a3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a3, a7, a4 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: xor a2, a5, a2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a1, a0 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a2, a2, a3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: or a0, a2, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: seqz a0, a0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: tail __riscv_restore_1 @@ -1755,30 +1755,30 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-FPELIM-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-NEXT: addi a4, sp, 16 -; ILP32E-FPELIM-NEXT: li a5, 9 -; ILP32E-FPELIM-NEXT: addi a6, sp, 40 -; ILP32E-FPELIM-NEXT: li a7, 7 -; ILP32E-FPELIM-NEXT: lui t0, 524272 -; ILP32E-FPELIM-NEXT: li t1, 8 +; ILP32E-FPELIM-NEXT: addi a3, sp, 16 +; ILP32E-FPELIM-NEXT: li a4, 9 +; ILP32E-FPELIM-NEXT: addi a5, sp, 40 +; ILP32E-FPELIM-NEXT: li a6, 7 +; ILP32E-FPELIM-NEXT: lui a7, 524272 +; ILP32E-FPELIM-NEXT: li t0, 8 ; ILP32E-FPELIM-NEXT: li a0, 1 ; ILP32E-FPELIM-NEXT: li a1, 2 ; ILP32E-FPELIM-NEXT: li a2, 3 +; ILP32E-FPELIM-NEXT: sw a6, 0(sp) +; ILP32E-FPELIM-NEXT: sw a5, 4(sp) +; ILP32E-FPELIM-NEXT: sw a4, 8(sp) +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) ; ILP32E-FPELIM-NEXT: li a3, 4 -; ILP32E-FPELIM-NEXT: sw a7, 0(sp) -; ILP32E-FPELIM-NEXT: sw a6, 4(sp) -; ILP32E-FPELIM-NEXT: sw a5, 8(sp) -; ILP32E-FPELIM-NEXT: sw a4, 12(sp) -; ILP32E-FPELIM-NEXT: li a4, 5 ; ILP32E-FPELIM-NEXT: sw zero, 16(sp) ; ILP32E-FPELIM-NEXT: sw zero, 20(sp) ; ILP32E-FPELIM-NEXT: sw zero, 24(sp) -; ILP32E-FPELIM-NEXT: sw t0, 28(sp) -; ILP32E-FPELIM-NEXT: li a5, 6 -; ILP32E-FPELIM-NEXT: sw t1, 40(sp) +; ILP32E-FPELIM-NEXT: sw a7, 28(sp) +; ILP32E-FPELIM-NEXT: li a4, 5 +; ILP32E-FPELIM-NEXT: sw t0, 40(sp) ; ILP32E-FPELIM-NEXT: sw zero, 44(sp) ; ILP32E-FPELIM-NEXT: sw zero, 48(sp) ; ILP32E-FPELIM-NEXT: sw zero, 52(sp) +; ILP32E-FPELIM-NEXT: li a5, 6 ; ILP32E-FPELIM-NEXT: call callee_large_scalars_exhausted_regs ; ILP32E-FPELIM-NEXT: addi sp, s0, -64 ; ILP32E-FPELIM-NEXT: .cfi_def_cfa sp, 64 @@ -1801,30 +1801,30 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-WITHFP-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-NEXT: addi a4, sp, 16 -; ILP32E-WITHFP-NEXT: li a5, 9 -; ILP32E-WITHFP-NEXT: addi a6, sp, 40 -; ILP32E-WITHFP-NEXT: li a7, 7 -; ILP32E-WITHFP-NEXT: lui t0, 524272 -; ILP32E-WITHFP-NEXT: li t1, 8 +; ILP32E-WITHFP-NEXT: addi a3, sp, 16 +; ILP32E-WITHFP-NEXT: li a4, 9 +; ILP32E-WITHFP-NEXT: addi a5, sp, 40 +; ILP32E-WITHFP-NEXT: li a6, 7 +; ILP32E-WITHFP-NEXT: lui a7, 524272 +; ILP32E-WITHFP-NEXT: li t0, 8 ; ILP32E-WITHFP-NEXT: li a0, 1 ; ILP32E-WITHFP-NEXT: li a1, 2 ; ILP32E-WITHFP-NEXT: li a2, 3 +; ILP32E-WITHFP-NEXT: sw a6, 0(sp) +; ILP32E-WITHFP-NEXT: sw a5, 4(sp) +; ILP32E-WITHFP-NEXT: sw a4, 8(sp) +; ILP32E-WITHFP-NEXT: sw a3, 12(sp) ; ILP32E-WITHFP-NEXT: li a3, 4 -; ILP32E-WITHFP-NEXT: sw a7, 0(sp) -; ILP32E-WITHFP-NEXT: sw a6, 4(sp) -; ILP32E-WITHFP-NEXT: sw a5, 8(sp) -; ILP32E-WITHFP-NEXT: sw a4, 12(sp) -; ILP32E-WITHFP-NEXT: li a4, 5 ; ILP32E-WITHFP-NEXT: sw zero, 16(sp) ; ILP32E-WITHFP-NEXT: sw zero, 20(sp) ; ILP32E-WITHFP-NEXT: sw zero, 24(sp) -; ILP32E-WITHFP-NEXT: sw t0, 28(sp) -; ILP32E-WITHFP-NEXT: li a5, 6 -; ILP32E-WITHFP-NEXT: sw t1, 40(sp) +; ILP32E-WITHFP-NEXT: sw a7, 28(sp) +; ILP32E-WITHFP-NEXT: li a4, 5 +; ILP32E-WITHFP-NEXT: sw t0, 40(sp) ; ILP32E-WITHFP-NEXT: sw zero, 44(sp) ; ILP32E-WITHFP-NEXT: sw zero, 48(sp) ; ILP32E-WITHFP-NEXT: sw zero, 52(sp) +; ILP32E-WITHFP-NEXT: li a5, 6 ; ILP32E-WITHFP-NEXT: call callee_large_scalars_exhausted_regs ; ILP32E-WITHFP-NEXT: addi sp, s0, -64 ; ILP32E-WITHFP-NEXT: .cfi_def_cfa sp, 64 @@ -1847,30 +1847,30 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a4, sp, 16 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 9 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a6, sp, 40 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a7, 7 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui t0, 524272 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t1, 8 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a3, sp, 16 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 9 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi a5, sp, 40 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a6, 7 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: lui a7, 524272 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li t0, 8 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 0(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a5, 4(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 8(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a3, 12(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a3, 4 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 0(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a6, 4(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a5, 8(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a4, 12(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 5 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 16(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 20(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 24(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 28(sp) -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 6 -; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t1, 40(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw a7, 28(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a4, 5 +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw t0, 40(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 44(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 48(sp) ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: sw zero, 52(sp) +; ILP32E-FPELIM-SAVE-RESTORE-NEXT: li a5, 6 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: addi sp, s0, -64 ; ILP32E-FPELIM-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 64 @@ -1889,30 +1889,30 @@ define i32 @caller_large_scalars_exhausted_regs() { ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa s0, 0 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a4, sp, 16 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 9 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a6, sp, 40 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a7, 7 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui t0, 524272 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t1, 8 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a3, sp, 16 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 9 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi a5, sp, 40 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a6, 7 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: lui a7, 524272 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li t0, 8 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a0, 1 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a1, 2 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a2, 3 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 0(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a5, 4(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 8(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a3, 12(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a3, 4 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 0(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a6, 4(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a5, 8(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a4, 12(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 5 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 16(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 20(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 24(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 28(sp) -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 6 -; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t1, 40(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw a7, 28(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a4, 5 +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw t0, 40(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 44(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 48(sp) ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: sw zero, 52(sp) +; ILP32E-WITHFP-SAVE-RESTORE-NEXT: li a5, 6 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: call callee_large_scalars_exhausted_regs ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: addi sp, s0, -64 ; ILP32E-WITHFP-SAVE-RESTORE-NEXT: .cfi_def_cfa sp, 64 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll index dabd2a7ce9a73..cb98422ebd3ae 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-ilp32f-ilp32d-common.ll @@ -59,9 +59,9 @@ define i32 @caller_float_in_fpr_exhausted_gprs() nounwind { ; RV32-ILP32FD-NEXT: li a0, 1 ; RV32-ILP32FD-NEXT: li a2, 2 ; RV32-ILP32FD-NEXT: li a4, 3 +; RV32-ILP32FD-NEXT: sw a1, 0(sp) ; RV32-ILP32FD-NEXT: fmv.w.x fa0, a3 ; RV32-ILP32FD-NEXT: li a6, 4 -; RV32-ILP32FD-NEXT: sw a1, 0(sp) ; RV32-ILP32FD-NEXT: li a1, 0 ; RV32-ILP32FD-NEXT: li a3, 0 ; RV32-ILP32FD-NEXT: li a5, 0 @@ -141,28 +141,28 @@ define i32 @caller_float_on_stack_exhausted_gprs_fprs() nounwind { ; RV32-ILP32FD: # %bb.0: ; RV32-ILP32FD-NEXT: addi sp, sp, -16 ; RV32-ILP32FD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill -; RV32-ILP32FD-NEXT: lui a1, 267520 -; RV32-ILP32FD-NEXT: lui a0, 262144 +; RV32-ILP32FD-NEXT: lui a0, 267520 +; RV32-ILP32FD-NEXT: lui a1, 262144 ; RV32-ILP32FD-NEXT: lui a2, 264192 ; RV32-ILP32FD-NEXT: lui a3, 265216 -; RV32-ILP32FD-NEXT: lui a4, 266240 -; RV32-ILP32FD-NEXT: lui a5, 266496 -; RV32-ILP32FD-NEXT: lui a6, 266752 -; RV32-ILP32FD-NEXT: lui a7, 267008 -; RV32-ILP32FD-NEXT: fmv.w.x fa0, a0 -; RV32-ILP32FD-NEXT: lui t0, 267264 -; RV32-ILP32FD-NEXT: fmv.w.x fa1, a2 +; RV32-ILP32FD-NEXT: lui a5, 266240 +; RV32-ILP32FD-NEXT: lui a6, 266496 +; RV32-ILP32FD-NEXT: lui a7, 266752 +; RV32-ILP32FD-NEXT: lui t0, 267008 +; RV32-ILP32FD-NEXT: sw a0, 0(sp) +; RV32-ILP32FD-NEXT: lui t1, 267264 +; RV32-ILP32FD-NEXT: fmv.w.x fa0, a1 ; RV32-ILP32FD-NEXT: li a0, 1 -; RV32-ILP32FD-NEXT: fmv.w.x fa2, a3 +; RV32-ILP32FD-NEXT: fmv.w.x fa1, a2 ; RV32-ILP32FD-NEXT: li a2, 3 -; RV32-ILP32FD-NEXT: fmv.w.x fa3, a4 +; RV32-ILP32FD-NEXT: fmv.w.x fa2, a3 ; RV32-ILP32FD-NEXT: li a4, 5 -; RV32-ILP32FD-NEXT: fmv.w.x fa4, a5 -; RV32-ILP32FD-NEXT: fmv.w.x fa5, a6 -; RV32-ILP32FD-NEXT: fmv.w.x fa6, a7 -; RV32-ILP32FD-NEXT: fmv.w.x fa7, t0 +; RV32-ILP32FD-NEXT: fmv.w.x fa3, a5 +; RV32-ILP32FD-NEXT: fmv.w.x fa4, a6 +; RV32-ILP32FD-NEXT: fmv.w.x fa5, a7 +; RV32-ILP32FD-NEXT: fmv.w.x fa6, t0 +; RV32-ILP32FD-NEXT: fmv.w.x fa7, t1 ; RV32-ILP32FD-NEXT: li a6, 7 -; RV32-ILP32FD-NEXT: sw a1, 0(sp) ; RV32-ILP32FD-NEXT: li a1, 0 ; RV32-ILP32FD-NEXT: li a3, 0 ; RV32-ILP32FD-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll index 746b71a08a30b..219fca5e48c52 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64-lp64f-lp64d-common.ll @@ -89,9 +89,9 @@ define i32 @caller_many_scalars() nounwind { ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a5, 5 ; RV64I-NEXT: li a6, 6 -; RV64I-NEXT: li a7, 7 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd a4, 8(sp) +; RV64I-NEXT: li a7, 7 ; RV64I-NEXT: li a4, 0 ; RV64I-NEXT: call callee_many_scalars ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload @@ -110,17 +110,17 @@ define i64 @callee_large_scalars(i256 %a, i256 %b) nounwind { ; RV64I-NEXT: ld a3, 8(a1) ; RV64I-NEXT: ld a4, 16(a1) ; RV64I-NEXT: ld a1, 24(a1) -; RV64I-NEXT: ld a5, 24(a0) +; RV64I-NEXT: ld a5, 0(a0) ; RV64I-NEXT: ld a6, 8(a0) ; RV64I-NEXT: ld a7, 16(a0) -; RV64I-NEXT: ld a0, 0(a0) -; RV64I-NEXT: xor a1, a5, a1 -; RV64I-NEXT: xor a3, a6, a3 -; RV64I-NEXT: xor a4, a7, a4 -; RV64I-NEXT: xor a0, a0, a2 -; RV64I-NEXT: or a1, a3, a1 -; RV64I-NEXT: or a0, a0, a4 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: ld a0, 24(a0) +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: xor a1, a6, a3 +; RV64I-NEXT: xor a3, a7, a4 +; RV64I-NEXT: xor a2, a5, a2 +; RV64I-NEXT: or a0, a1, a0 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: or a0, a2, a0 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = icmp eq i256 %a, %b @@ -133,18 +133,18 @@ define i64 @caller_large_scalars() nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -80 ; RV64I-NEXT: sd ra, 72(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a2, 2 -; RV64I-NEXT: li a3, 1 +; RV64I-NEXT: li a1, 2 +; RV64I-NEXT: li a2, 1 ; RV64I-NEXT: addi a0, sp, 32 -; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: sd a2, 0(sp) +; RV64I-NEXT: sd a1, 0(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 16(sp) ; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: sd a3, 32(sp) +; RV64I-NEXT: sd a2, 32(sp) ; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: sd zero, 48(sp) ; RV64I-NEXT: sd zero, 56(sp) +; RV64I-NEXT: mv a1, sp ; RV64I-NEXT: call callee_large_scalars ; RV64I-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 80 @@ -165,17 +165,17 @@ define i64 @callee_large_scalars_exhausted_regs(i64 %a, i64 %b, i64 %c, i64 %d, ; RV64I-NEXT: ld a2, 8(a7) ; RV64I-NEXT: ld a3, 16(a7) ; RV64I-NEXT: ld a4, 24(a7) -; RV64I-NEXT: ld a5, 24(a0) +; RV64I-NEXT: ld a5, 0(a0) ; RV64I-NEXT: ld a6, 8(a0) ; RV64I-NEXT: ld a7, 16(a0) -; RV64I-NEXT: ld a0, 0(a0) -; RV64I-NEXT: xor a4, a4, a5 +; RV64I-NEXT: ld a0, 24(a0) +; RV64I-NEXT: xor a0, a4, a0 ; RV64I-NEXT: xor a2, a2, a6 ; RV64I-NEXT: xor a3, a3, a7 -; RV64I-NEXT: xor a0, a1, a0 -; RV64I-NEXT: or a2, a2, a4 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: or a0, a0, a2 +; RV64I-NEXT: xor a1, a1, a5 +; RV64I-NEXT: or a0, a2, a0 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret %1 = icmp eq i256 %h, %j @@ -188,28 +188,28 @@ define i64 @caller_large_scalars_exhausted_regs() nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -96 ; RV64I-NEXT: sd ra, 88(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a7, sp, 16 -; RV64I-NEXT: li t0, 9 -; RV64I-NEXT: li t1, 10 -; RV64I-NEXT: li t2, 8 +; RV64I-NEXT: addi a6, sp, 16 +; RV64I-NEXT: li a7, 9 +; RV64I-NEXT: li t0, 10 +; RV64I-NEXT: li t1, 8 ; RV64I-NEXT: li a0, 1 ; RV64I-NEXT: li a1, 2 ; RV64I-NEXT: li a2, 3 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 5 ; RV64I-NEXT: li a5, 6 +; RV64I-NEXT: sd a7, 0(sp) +; RV64I-NEXT: sd a6, 8(sp) ; RV64I-NEXT: li a6, 7 -; RV64I-NEXT: sd t0, 0(sp) -; RV64I-NEXT: sd a7, 8(sp) -; RV64I-NEXT: addi a7, sp, 48 -; RV64I-NEXT: sd t1, 16(sp) +; RV64I-NEXT: sd t0, 16(sp) ; RV64I-NEXT: sd zero, 24(sp) ; RV64I-NEXT: sd zero, 32(sp) ; RV64I-NEXT: sd zero, 40(sp) -; RV64I-NEXT: sd t2, 48(sp) +; RV64I-NEXT: sd t1, 48(sp) ; RV64I-NEXT: sd zero, 56(sp) ; RV64I-NEXT: sd zero, 64(sp) ; RV64I-NEXT: sd zero, 72(sp) +; RV64I-NEXT: addi a7, sp, 48 ; RV64I-NEXT: call callee_large_scalars_exhausted_regs ; RV64I-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 96 @@ -329,13 +329,13 @@ define i64 @callee_aligned_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i128 %f ; RV64I-LABEL: callee_aligned_stack: ; RV64I: # %bb.0: ; RV64I-NEXT: ld a0, 32(sp) -; RV64I-NEXT: ld a1, 0(sp) -; RV64I-NEXT: ld a2, 16(sp) +; RV64I-NEXT: ld a1, 16(sp) +; RV64I-NEXT: ld a2, 0(sp) ; RV64I-NEXT: ld a3, 40(sp) ; RV64I-NEXT: add a5, a5, a7 -; RV64I-NEXT: add a1, a5, a1 -; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: add a2, a5, a2 ; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: add a0, a0, a3 ; RV64I-NEXT: ret %f_trunc = trunc i128 %f to i64 @@ -356,24 +356,24 @@ define void @caller_aligned_stack() nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -64 ; RV64I-NEXT: sd ra, 56(sp) # 8-byte Folded Spill -; RV64I-NEXT: li a6, 12 -; RV64I-NEXT: li a7, 11 -; RV64I-NEXT: li t0, 10 -; RV64I-NEXT: li t1, 9 -; RV64I-NEXT: li t2, 8 +; RV64I-NEXT: li a5, 12 +; RV64I-NEXT: li a6, 11 +; RV64I-NEXT: li a7, 10 +; RV64I-NEXT: li t0, 9 +; RV64I-NEXT: li t1, 8 ; RV64I-NEXT: li a0, 1 ; RV64I-NEXT: li a1, 2 ; RV64I-NEXT: li a2, 3 ; RV64I-NEXT: li a3, 4 ; RV64I-NEXT: li a4, 5 +; RV64I-NEXT: sd a6, 40(sp) +; RV64I-NEXT: sd a5, 48(sp) ; RV64I-NEXT: li a5, 6 -; RV64I-NEXT: sd a7, 40(sp) -; RV64I-NEXT: sd a6, 48(sp) -; RV64I-NEXT: li a7, 7 -; RV64I-NEXT: sd t2, 0(sp) -; RV64I-NEXT: sd t1, 16(sp) +; RV64I-NEXT: sd t1, 0(sp) +; RV64I-NEXT: sd t0, 16(sp) ; RV64I-NEXT: sd zero, 24(sp) -; RV64I-NEXT: sd t0, 32(sp) +; RV64I-NEXT: sd a7, 32(sp) +; RV64I-NEXT: li a7, 7 ; RV64I-NEXT: li a6, 0 ; RV64I-NEXT: call callee_aligned_stack ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll index c2db8fe5248fd..d43f43ceffec3 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64.ll @@ -112,8 +112,8 @@ define i64 @caller_float_on_stack() nounwind { ; RV64I-FPELIM-NEXT: li a0, 1 ; RV64I-FPELIM-NEXT: li a2, 2 ; RV64I-FPELIM-NEXT: li a4, 3 -; RV64I-FPELIM-NEXT: li a6, 4 ; RV64I-FPELIM-NEXT: sd a1, 0(sp) +; RV64I-FPELIM-NEXT: li a6, 4 ; RV64I-FPELIM-NEXT: li a1, 0 ; RV64I-FPELIM-NEXT: li a3, 0 ; RV64I-FPELIM-NEXT: li a5, 0 @@ -133,8 +133,8 @@ define i64 @caller_float_on_stack() nounwind { ; RV64I-WITHFP-NEXT: li a0, 1 ; RV64I-WITHFP-NEXT: li a2, 2 ; RV64I-WITHFP-NEXT: li a4, 3 -; RV64I-WITHFP-NEXT: li a6, 4 ; RV64I-WITHFP-NEXT: sd a1, 0(sp) +; RV64I-WITHFP-NEXT: li a6, 4 ; RV64I-WITHFP-NEXT: li a1, 0 ; RV64I-WITHFP-NEXT: li a3, 0 ; RV64I-WITHFP-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll index 985135a086e24..cc10e900faa0b 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-lp64e.ll @@ -118,10 +118,10 @@ define i64 @caller_float_on_stack() nounwind { ; RV64I-LP64E-FPELIM-NEXT: li a3, 4 ; RV64I-LP64E-FPELIM-NEXT: li a0, 1 ; RV64I-LP64E-FPELIM-NEXT: li a2, 2 -; RV64I-LP64E-FPELIM-NEXT: li a4, 3 ; RV64I-LP64E-FPELIM-NEXT: sd a3, 0(sp) ; RV64I-LP64E-FPELIM-NEXT: sd zero, 8(sp) ; RV64I-LP64E-FPELIM-NEXT: sd a1, 16(sp) +; RV64I-LP64E-FPELIM-NEXT: li a4, 3 ; RV64I-LP64E-FPELIM-NEXT: li a1, 0 ; RV64I-LP64E-FPELIM-NEXT: li a3, 0 ; RV64I-LP64E-FPELIM-NEXT: li a5, 0 @@ -143,10 +143,10 @@ define i64 @caller_float_on_stack() nounwind { ; RV64I-LP64E-WITHFP-NEXT: li a3, 4 ; RV64I-LP64E-WITHFP-NEXT: li a0, 1 ; RV64I-LP64E-WITHFP-NEXT: li a2, 2 -; RV64I-LP64E-WITHFP-NEXT: li a4, 3 ; RV64I-LP64E-WITHFP-NEXT: sd a3, 0(sp) ; RV64I-LP64E-WITHFP-NEXT: sd zero, 8(sp) ; RV64I-LP64E-WITHFP-NEXT: sd a1, 16(sp) +; RV64I-LP64E-WITHFP-NEXT: li a4, 3 ; RV64I-LP64E-WITHFP-NEXT: li a1, 0 ; RV64I-LP64E-WITHFP-NEXT: li a3, 0 ; RV64I-LP64E-WITHFP-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll index eaba1acffa054..284de1988d37e 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32.ll @@ -37,9 +37,9 @@ define float @caller_onstack_f32_noop(float %a) nounwind { ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: li a2, 2 ; RV32IF-NEXT: li a4, 3 -; RV32IF-NEXT: li a6, 4 ; RV32IF-NEXT: sw a3, 0(sp) ; RV32IF-NEXT: sw a1, 4(sp) +; RV32IF-NEXT: li a6, 4 ; RV32IF-NEXT: li a1, 0 ; RV32IF-NEXT: li a3, 0 ; RV32IF-NEXT: li a5, 0 @@ -61,12 +61,12 @@ define float @caller_onstack_f32_fadd(float %a, float %b) nounwind { ; RV32IF-NEXT: fmv.w.x fa4, a0 ; RV32IF-NEXT: fadd.s fa3, fa4, fa5 ; RV32IF-NEXT: fsub.s fa5, fa5, fa4 +; RV32IF-NEXT: fsw fa3, 0(sp) +; RV32IF-NEXT: fsw fa5, 4(sp) ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: li a2, 2 ; RV32IF-NEXT: li a4, 3 ; RV32IF-NEXT: li a6, 4 -; RV32IF-NEXT: fsw fa3, 0(sp) -; RV32IF-NEXT: fsw fa5, 4(sp) ; RV32IF-NEXT: li a1, 0 ; RV32IF-NEXT: li a3, 0 ; RV32IF-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll index 63d4ea5fee331..6bc0e773f0aff 100644 --- a/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll +++ b/llvm/test/CodeGen/RISCV/calling-conv-rv32f-ilp32e.ll @@ -34,14 +34,14 @@ define float @caller_onstack_f32_noop(float %a) nounwind { ; RV32IF-ILP32E-NEXT: sw ra, 16(sp) # 4-byte Folded Spill ; RV32IF-ILP32E-NEXT: mv a1, a0 ; RV32IF-ILP32E-NEXT: lui a3, 264704 -; RV32IF-ILP32E-NEXT: li a5, 4 +; RV32IF-ILP32E-NEXT: li a4, 4 ; RV32IF-ILP32E-NEXT: li a0, 1 ; RV32IF-ILP32E-NEXT: li a2, 2 -; RV32IF-ILP32E-NEXT: li a4, 3 -; RV32IF-ILP32E-NEXT: sw a5, 0(sp) +; RV32IF-ILP32E-NEXT: sw a4, 0(sp) ; RV32IF-ILP32E-NEXT: sw zero, 4(sp) ; RV32IF-ILP32E-NEXT: sw a3, 8(sp) ; RV32IF-ILP32E-NEXT: sw a1, 12(sp) +; RV32IF-ILP32E-NEXT: li a4, 3 ; RV32IF-ILP32E-NEXT: li a1, 0 ; RV32IF-ILP32E-NEXT: li a3, 0 ; RV32IF-ILP32E-NEXT: li a5, 0 @@ -65,11 +65,11 @@ define float @caller_onstack_f32_fadd(float %a, float %b) nounwind { ; RV32IF-ILP32E-NEXT: li a1, 4 ; RV32IF-ILP32E-NEXT: li a0, 1 ; RV32IF-ILP32E-NEXT: li a2, 2 -; RV32IF-ILP32E-NEXT: li a4, 3 ; RV32IF-ILP32E-NEXT: sw a1, 0(sp) ; RV32IF-ILP32E-NEXT: sw zero, 4(sp) ; RV32IF-ILP32E-NEXT: fsw fa3, 8(sp) ; RV32IF-ILP32E-NEXT: fsw fa5, 12(sp) +; RV32IF-ILP32E-NEXT: li a4, 3 ; RV32IF-ILP32E-NEXT: li a1, 0 ; RV32IF-ILP32E-NEXT: li a3, 0 ; RV32IF-ILP32E-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/calls.ll b/llvm/test/CodeGen/RISCV/calls.ll index cf0e625f3c6c7..6aef8b18f5b77 100644 --- a/llvm/test/CodeGen/RISCV/calls.ll +++ b/llvm/test/CodeGen/RISCV/calls.ll @@ -654,11 +654,11 @@ define i32 @test_call_external_many_args(i32 %a) nounwind { ; RV64I-LARGE-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-LARGE-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-LARGE-NEXT: mv s0, a0 +; RV64I-LARGE-NEXT: sd a0, 0(sp) +; RV64I-LARGE-NEXT: sd a0, 8(sp) ; RV64I-LARGE-NEXT: .Lpcrel_hi4: ; RV64I-LARGE-NEXT: auipc a0, %pcrel_hi(.LCPI8_0) ; RV64I-LARGE-NEXT: ld t1, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV64I-LARGE-NEXT: sd s0, 0(sp) -; RV64I-LARGE-NEXT: sd s0, 8(sp) ; RV64I-LARGE-NEXT: mv a0, s0 ; RV64I-LARGE-NEXT: mv a1, s0 ; RV64I-LARGE-NEXT: mv a2, s0 @@ -681,11 +681,11 @@ define i32 @test_call_external_many_args(i32 %a) nounwind { ; RV64I-LARGE-ZICFILP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64I-LARGE-ZICFILP-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64I-LARGE-ZICFILP-NEXT: mv s0, a0 +; RV64I-LARGE-ZICFILP-NEXT: sd a0, 0(sp) +; RV64I-LARGE-ZICFILP-NEXT: sd a0, 8(sp) ; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi4: ; RV64I-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI8_0) ; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi4)(a0) -; RV64I-LARGE-ZICFILP-NEXT: sd s0, 0(sp) -; RV64I-LARGE-ZICFILP-NEXT: sd s0, 8(sp) ; RV64I-LARGE-ZICFILP-NEXT: mv a0, s0 ; RV64I-LARGE-ZICFILP-NEXT: mv a1, s0 ; RV64I-LARGE-ZICFILP-NEXT: mv a2, s0 @@ -823,11 +823,11 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind { ; RV64I-LARGE: # %bb.0: ; RV64I-LARGE-NEXT: addi sp, sp, -32 ; RV64I-LARGE-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-LARGE-NEXT: sd a0, 0(sp) +; RV64I-LARGE-NEXT: sd a0, 8(sp) ; RV64I-LARGE-NEXT: .Lpcrel_hi5: ; RV64I-LARGE-NEXT: auipc a1, %pcrel_hi(.LCPI10_0) ; RV64I-LARGE-NEXT: ld t1, %pcrel_lo(.Lpcrel_hi5)(a1) -; RV64I-LARGE-NEXT: sd a0, 0(sp) -; RV64I-LARGE-NEXT: sd a0, 8(sp) ; RV64I-LARGE-NEXT: mv a1, a0 ; RV64I-LARGE-NEXT: mv a2, a0 ; RV64I-LARGE-NEXT: mv a3, a0 @@ -845,11 +845,11 @@ define i32 @test_call_defined_many_args(i32 %a) nounwind { ; RV64I-LARGE-ZICFILP-NEXT: lpad 0 ; RV64I-LARGE-ZICFILP-NEXT: addi sp, sp, -32 ; RV64I-LARGE-ZICFILP-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64I-LARGE-ZICFILP-NEXT: sd a0, 0(sp) +; RV64I-LARGE-ZICFILP-NEXT: sd a0, 8(sp) ; RV64I-LARGE-ZICFILP-NEXT: .Lpcrel_hi5: ; RV64I-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI10_0) ; RV64I-LARGE-ZICFILP-NEXT: ld t2, %pcrel_lo(.Lpcrel_hi5)(a1) -; RV64I-LARGE-ZICFILP-NEXT: sd a0, 0(sp) -; RV64I-LARGE-ZICFILP-NEXT: sd a0, 8(sp) ; RV64I-LARGE-ZICFILP-NEXT: mv a1, a0 ; RV64I-LARGE-ZICFILP-NEXT: mv a2, a0 ; RV64I-LARGE-ZICFILP-NEXT: mv a3, a0 diff --git a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll index 4831f0b24c7fe..ab8460d944b33 100644 --- a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll +++ b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll @@ -119,9 +119,9 @@ define signext i32 @lower_blockaddress_displ(i32 signext %w) nounwind { ; RV32I-SMALL-NEXT: addi sp, sp, -16 ; RV32I-SMALL-NEXT: lui a1, %hi(.Ltmp0) ; RV32I-SMALL-NEXT: addi a1, a1, %lo(.Ltmp0) -; RV32I-SMALL-NEXT: li a2, 101 ; RV32I-SMALL-NEXT: sw a1, 8(sp) -; RV32I-SMALL-NEXT: blt a0, a2, .LBB2_3 +; RV32I-SMALL-NEXT: li a1, 101 +; RV32I-SMALL-NEXT: blt a0, a1, .LBB2_3 ; RV32I-SMALL-NEXT: # %bb.1: # %if.then ; RV32I-SMALL-NEXT: lw a0, 8(sp) ; RV32I-SMALL-NEXT: jr a0 @@ -141,9 +141,9 @@ define signext i32 @lower_blockaddress_displ(i32 signext %w) nounwind { ; RV32I-MEDIUM-NEXT: .Lpcrel_hi2: ; RV32I-MEDIUM-NEXT: auipc a1, %pcrel_hi(.Ltmp0) ; RV32I-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.Lpcrel_hi2) -; RV32I-MEDIUM-NEXT: li a2, 101 ; RV32I-MEDIUM-NEXT: sw a1, 8(sp) -; RV32I-MEDIUM-NEXT: blt a0, a2, .LBB2_3 +; RV32I-MEDIUM-NEXT: li a1, 101 +; RV32I-MEDIUM-NEXT: blt a0, a1, .LBB2_3 ; RV32I-MEDIUM-NEXT: # %bb.1: # %if.then ; RV32I-MEDIUM-NEXT: lw a0, 8(sp) ; RV32I-MEDIUM-NEXT: jr a0 @@ -162,9 +162,9 @@ define signext i32 @lower_blockaddress_displ(i32 signext %w) nounwind { ; RV64I-SMALL-NEXT: addi sp, sp, -16 ; RV64I-SMALL-NEXT: lui a1, %hi(.Ltmp0) ; RV64I-SMALL-NEXT: addi a1, a1, %lo(.Ltmp0) -; RV64I-SMALL-NEXT: li a2, 101 ; RV64I-SMALL-NEXT: sd a1, 8(sp) -; RV64I-SMALL-NEXT: blt a0, a2, .LBB2_3 +; RV64I-SMALL-NEXT: li a1, 101 +; RV64I-SMALL-NEXT: blt a0, a1, .LBB2_3 ; RV64I-SMALL-NEXT: # %bb.1: # %if.then ; RV64I-SMALL-NEXT: ld a0, 8(sp) ; RV64I-SMALL-NEXT: jr a0 @@ -184,9 +184,9 @@ define signext i32 @lower_blockaddress_displ(i32 signext %w) nounwind { ; RV64I-MEDIUM-NEXT: .Lpcrel_hi2: ; RV64I-MEDIUM-NEXT: auipc a1, %pcrel_hi(.Ltmp0) ; RV64I-MEDIUM-NEXT: addi a1, a1, %pcrel_lo(.Lpcrel_hi2) -; RV64I-MEDIUM-NEXT: li a2, 101 ; RV64I-MEDIUM-NEXT: sd a1, 8(sp) -; RV64I-MEDIUM-NEXT: blt a0, a2, .LBB2_3 +; RV64I-MEDIUM-NEXT: li a1, 101 +; RV64I-MEDIUM-NEXT: blt a0, a1, .LBB2_3 ; RV64I-MEDIUM-NEXT: # %bb.1: # %if.then ; RV64I-MEDIUM-NEXT: ld a0, 8(sp) ; RV64I-MEDIUM-NEXT: jr a0 @@ -206,9 +206,9 @@ define signext i32 @lower_blockaddress_displ(i32 signext %w) nounwind { ; RV64I-LARGE-NEXT: .Lpcrel_hi2: ; RV64I-LARGE-NEXT: auipc a1, %pcrel_hi(.Ltmp0) ; RV64I-LARGE-NEXT: addi a1, a1, %pcrel_lo(.Lpcrel_hi2) -; RV64I-LARGE-NEXT: li a2, 101 ; RV64I-LARGE-NEXT: sd a1, 8(sp) -; RV64I-LARGE-NEXT: blt a0, a2, .LBB2_3 +; RV64I-LARGE-NEXT: li a1, 101 +; RV64I-LARGE-NEXT: blt a0, a1, .LBB2_3 ; RV64I-LARGE-NEXT: # %bb.1: # %if.then ; RV64I-LARGE-NEXT: ld a0, 8(sp) ; RV64I-LARGE-NEXT: jr a0 diff --git a/llvm/test/CodeGen/RISCV/condbinops.ll b/llvm/test/CodeGen/RISCV/condbinops.ll index dc81c13bfb6a3..e898661665e99 100644 --- a/llvm/test/CodeGen/RISCV/condbinops.ll +++ b/llvm/test/CodeGen/RISCV/condbinops.ll @@ -411,8 +411,8 @@ define i64 @shl64(i64 %x, i64 %y, i1 %c) { ; RV32I-NEXT: slli a4, a4, 31 ; RV32I-NEXT: srai a4, a4, 31 ; RV32I-NEXT: and a4, a4, a2 -; RV32I-NEXT: addi a3, a4, -32 ; RV32I-NEXT: sll a2, a0, a4 +; RV32I-NEXT: addi a3, a4, -32 ; RV32I-NEXT: bltz a3, .LBB8_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a1, a2 @@ -486,8 +486,8 @@ define i64 @ashr64(i64 %x, i64 %y, i1 %c) { ; RV32I-NEXT: slli a4, a4, 31 ; RV32I-NEXT: srai a4, a4, 31 ; RV32I-NEXT: and a2, a4, a2 -; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: sra a0, a1, a2 +; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: bltz a4, .LBB9_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srai a1, a1, 31 @@ -496,10 +496,9 @@ define i64 @ashr64(i64 %x, i64 %y, i1 %c) { ; RV32I-NEXT: srl a3, a3, a2 ; RV32I-NEXT: not a2, a2 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: sll a1, a1, a2 -; RV32I-NEXT: or a3, a3, a1 +; RV32I-NEXT: sll a2, a1, a2 ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: or a0, a3, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ashr64: @@ -562,8 +561,8 @@ define i64 @lshr64(i64 %x, i64 %y, i1 %c) { ; RV32I-NEXT: slli a4, a4, 31 ; RV32I-NEXT: srai a4, a4, 31 ; RV32I-NEXT: and a4, a4, a2 -; RV32I-NEXT: addi a3, a4, -32 ; RV32I-NEXT: srl a2, a1, a4 +; RV32I-NEXT: addi a3, a4, -32 ; RV32I-NEXT: bltz a3, .LBB10_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a2 diff --git a/llvm/test/CodeGen/RISCV/condops.ll b/llvm/test/CodeGen/RISCV/condops.ll index 6c2ba493ffcd5..bd9e543e955d5 100644 --- a/llvm/test/CodeGen/RISCV/condops.ll +++ b/llvm/test/CodeGen/RISCV/condops.ll @@ -1348,13 +1348,13 @@ define i64 @seteq(i64 %a, i64 %b, i64 %rs1, i64 %rs2) { ; RV32I: # %bb.0: ; RV32I-NEXT: xor a1, a1, a3 ; RV32I-NEXT: xor a0, a0, a2 -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: beqz a1, .LBB23_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: beqz a0, .LBB23_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a6 +; RV32I-NEXT: mv a4, a6 ; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB23_2: +; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: ret ; @@ -1425,13 +1425,13 @@ define i64 @setne(i64 %a, i64 %b, i64 %rs1, i64 %rs2) { ; RV32I: # %bb.0: ; RV32I-NEXT: xor a1, a1, a3 ; RV32I-NEXT: xor a0, a0, a2 -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: bnez a1, .LBB24_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: bnez a0, .LBB24_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a6 +; RV32I-NEXT: mv a4, a6 ; RV32I-NEXT: mv a5, a7 ; RV32I-NEXT: .LBB24_2: +; RV32I-NEXT: mv a0, a4 ; RV32I-NEXT: mv a1, a5 ; RV32I-NEXT: ret ; @@ -2196,13 +2196,13 @@ define i64 @setule(i64 %a, i64 %b, i64 %rs1, i64 %rs2) { define i64 @seteq_zero(i64 %a, i64 %rs1, i64 %rs2) { ; RV32I-LABEL: seteq_zero: ; RV32I: # %bb.0: -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: beqz a1, .LBB33_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: beqz a0, .LBB33_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB33_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; @@ -2264,13 +2264,13 @@ define i64 @seteq_zero(i64 %a, i64 %rs1, i64 %rs2) { define i64 @setne_zero(i64 %a, i64 %rs1, i64 %rs2) { ; RV32I-LABEL: setne_zero: ; RV32I: # %bb.0: -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: bnez a1, .LBB34_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: bnez a0, .LBB34_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB34_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; @@ -2333,13 +2333,13 @@ define i64 @seteq_constant(i64 %a, i64 %rs1, i64 %rs2) { ; RV32I-LABEL: seteq_constant: ; RV32I: # %bb.0: ; RV32I-NEXT: xori a0, a0, 123 -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: beqz a1, .LBB35_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: beqz a0, .LBB35_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB35_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; @@ -2408,13 +2408,13 @@ define i64 @setne_constant(i64 %a, i64 %rs1, i64 %rs2) { ; RV32I-LABEL: setne_constant: ; RV32I: # %bb.0: ; RV32I-NEXT: xori a0, a0, 456 -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: bnez a1, .LBB36_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: bnez a0, .LBB36_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB36_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; @@ -2483,13 +2483,13 @@ define i64 @seteq_2048(i64 %a, i64 %rs1, i64 %rs2) { ; RV32I-LABEL: seteq_2048: ; RV32I: # %bb.0: ; RV32I-NEXT: binvi a0, a0, 11 -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: beqz a1, .LBB37_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: beqz a0, .LBB37_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB37_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; @@ -2559,13 +2559,13 @@ define i64 @seteq_neg2048(i64 %a, i64 %rs1, i64 %rs2) { ; RV32I: # %bb.0: ; RV32I-NEXT: not a1, a1 ; RV32I-NEXT: xori a0, a0, -2048 -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: beqz a1, .LBB38_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: beqz a0, .LBB38_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB38_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; @@ -2637,13 +2637,13 @@ define i64 @setne_neg2048(i64 %a, i64 %rs1, i64 %rs2) { ; RV32I: # %bb.0: ; RV32I-NEXT: not a1, a1 ; RV32I-NEXT: xori a0, a0, -2048 -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: bnez a1, .LBB39_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: bnez a0, .LBB39_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: mv a3, a5 ; RV32I-NEXT: .LBB39_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/copysign-casts.ll b/llvm/test/CodeGen/RISCV/copysign-casts.ll index 53de36f1699a9..5400ec6d005ef 100644 --- a/llvm/test/CodeGen/RISCV/copysign-casts.ll +++ b/llvm/test/CodeGen/RISCV/copysign-casts.ll @@ -702,17 +702,17 @@ define half @fold_demote_h_d(half %a, double %b) nounwind { ; RV32IFD-LABEL: fold_demote_h_d: ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -16 +; RV32IFD-NEXT: fmv.x.w a0, fa0 ; RV32IFD-NEXT: fsd fa1, 8(sp) -; RV32IFD-NEXT: lw a0, 12(sp) -; RV32IFD-NEXT: fmv.x.w a1, fa0 -; RV32IFD-NEXT: lui a2, 524288 -; RV32IFD-NEXT: and a0, a0, a2 +; RV32IFD-NEXT: lui a1, 524288 +; RV32IFD-NEXT: lw a2, 12(sp) +; RV32IFD-NEXT: and a1, a2, a1 ; RV32IFD-NEXT: lui a2, 1048560 -; RV32IFD-NEXT: slli a1, a1, 17 -; RV32IFD-NEXT: srli a1, a1, 17 -; RV32IFD-NEXT: srli a0, a0, 16 -; RV32IFD-NEXT: or a1, a1, a2 -; RV32IFD-NEXT: or a0, a1, a0 +; RV32IFD-NEXT: slli a0, a0, 17 +; RV32IFD-NEXT: srli a0, a0, 17 +; RV32IFD-NEXT: srli a1, a1, 16 +; RV32IFD-NEXT: or a0, a0, a2 +; RV32IFD-NEXT: or a0, a0, a1 ; RV32IFD-NEXT: fmv.w.x fa0, a0 ; RV32IFD-NEXT: addi sp, sp, 16 ; RV32IFD-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll index da97ac0d74237..a098de49f8410 100644 --- a/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/ctlz-cttz-ctpop.ll @@ -475,10 +475,10 @@ define i64 @test_cttz_i64(i64 %a) nounwind { ; RV64M: # %bb.0: ; RV64M-NEXT: beqz a0, .LBB3_2 ; RV64M-NEXT: # %bb.1: # %cond.false +; RV64M-NEXT: neg a1, a0 +; RV64M-NEXT: and a0, a0, a1 ; RV64M-NEXT: lui a1, %hi(.LCPI3_0) ; RV64M-NEXT: ld a1, %lo(.LCPI3_0)(a1) -; RV64M-NEXT: neg a2, a0 -; RV64M-NEXT: and a0, a0, a2 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: srli a0, a0, 58 ; RV64M-NEXT: lui a1, %hi(.LCPI3_1) @@ -889,10 +889,10 @@ define i64 @test_cttz_i64_zero_undef(i64 %a) nounwind { ; ; RV64M-LABEL: test_cttz_i64_zero_undef: ; RV64M: # %bb.0: +; RV64M-NEXT: neg a1, a0 +; RV64M-NEXT: and a0, a0, a1 ; RV64M-NEXT: lui a1, %hi(.LCPI7_0) ; RV64M-NEXT: ld a1, %lo(.LCPI7_0)(a1) -; RV64M-NEXT: neg a2, a0 -; RV64M-NEXT: and a0, a0, a2 ; RV64M-NEXT: mul a0, a0, a1 ; RV64M-NEXT: srli a0, a0, 58 ; RV64M-NEXT: lui a1, %hi(.LCPI7_1) diff --git a/llvm/test/CodeGen/RISCV/double-calling-conv.ll b/llvm/test/CodeGen/RISCV/double-calling-conv.ll index 798eac64e9fc2..51f75c10462d0 100644 --- a/llvm/test/CodeGen/RISCV/double-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/double-calling-conv.ll @@ -93,8 +93,8 @@ define double @callee_double_split_reg_stack(i32 %a, i64 %b, i64 %c, double %d, ; RV32IZFINXZDINX-LABEL: callee_double_split_reg_stack: ; RV32IZFINXZDINX: # %bb.0: ; RV32IZFINXZDINX-NEXT: mv a0, a7 -; RV32IZFINXZDINX-NEXT: lw a1, 0(sp) ; RV32IZFINXZDINX-NEXT: mv a3, a6 +; RV32IZFINXZDINX-NEXT: lw a1, 0(sp) ; RV32IZFINXZDINX-NEXT: mv a2, a5 ; RV32IZFINXZDINX-NEXT: fadd.d a0, a2, a0 ; RV32IZFINXZDINX-NEXT: ret @@ -115,8 +115,8 @@ define double @caller_double_split_reg_stack() nounwind { ; RV32IFD-NEXT: addi a2, a2, 327 ; RV32IFD-NEXT: addi a6, a3, 327 ; RV32IFD-NEXT: addi a5, a4, -1311 -; RV32IFD-NEXT: li a3, 3 ; RV32IFD-NEXT: sw a2, 0(sp) +; RV32IFD-NEXT: li a3, 3 ; RV32IFD-NEXT: li a2, 0 ; RV32IFD-NEXT: li a4, 0 ; RV32IFD-NEXT: mv a7, a5 @@ -137,8 +137,8 @@ define double @caller_double_split_reg_stack() nounwind { ; RV32IZFINXZDINX-NEXT: addi a2, a2, 327 ; RV32IZFINXZDINX-NEXT: addi a6, a3, 327 ; RV32IZFINXZDINX-NEXT: addi a5, a4, -1311 -; RV32IZFINXZDINX-NEXT: li a3, 3 ; RV32IZFINXZDINX-NEXT: sw a2, 0(sp) +; RV32IZFINXZDINX-NEXT: li a3, 3 ; RV32IZFINXZDINX-NEXT: li a2, 0 ; RV32IZFINXZDINX-NEXT: li a4, 0 ; RV32IZFINXZDINX-NEXT: mv a7, a5 @@ -186,7 +186,6 @@ define double @caller_double_stack() nounwind { ; RV32IFD-NEXT: li a0, 1 ; RV32IFD-NEXT: li a2, 2 ; RV32IFD-NEXT: li a4, 3 -; RV32IFD-NEXT: li a6, 4 ; RV32IFD-NEXT: addi a1, a1, 327 ; RV32IFD-NEXT: addi a3, a3, -1311 ; RV32IFD-NEXT: addi a5, a5, 327 @@ -194,6 +193,7 @@ define double @caller_double_stack() nounwind { ; RV32IFD-NEXT: sw a1, 4(sp) ; RV32IFD-NEXT: sw a3, 8(sp) ; RV32IFD-NEXT: sw a5, 12(sp) +; RV32IFD-NEXT: li a6, 4 ; RV32IFD-NEXT: li a1, 0 ; RV32IFD-NEXT: li a3, 0 ; RV32IFD-NEXT: li a5, 0 @@ -213,7 +213,6 @@ define double @caller_double_stack() nounwind { ; RV32IZFINXZDINX-NEXT: li a0, 1 ; RV32IZFINXZDINX-NEXT: li a2, 2 ; RV32IZFINXZDINX-NEXT: li a4, 3 -; RV32IZFINXZDINX-NEXT: li a6, 4 ; RV32IZFINXZDINX-NEXT: addi a1, a1, 327 ; RV32IZFINXZDINX-NEXT: addi a3, a3, -1311 ; RV32IZFINXZDINX-NEXT: addi a5, a5, 327 @@ -221,6 +220,7 @@ define double @caller_double_stack() nounwind { ; RV32IZFINXZDINX-NEXT: sw a1, 4(sp) ; RV32IZFINXZDINX-NEXT: sw a3, 8(sp) ; RV32IZFINXZDINX-NEXT: sw a5, 12(sp) +; RV32IZFINXZDINX-NEXT: li a6, 4 ; RV32IZFINXZDINX-NEXT: li a1, 0 ; RV32IZFINXZDINX-NEXT: li a3, 0 ; RV32IZFINXZDINX-NEXT: li a5, 0 diff --git a/llvm/test/CodeGen/RISCV/double-convert.ll b/llvm/test/CodeGen/RISCV/double-convert.ll index c39085a80ddc1..052cfd6adff06 100644 --- a/llvm/test/CodeGen/RISCV/double-convert.ll +++ b/llvm/test/CodeGen/RISCV/double-convert.ll @@ -687,9 +687,9 @@ define i64 @fcvt_l_d_sat(double %a) nounwind { ; RV32IFD-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill +; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: lui a0, %hi(.LCPI12_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI12_0)(a0) -; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi ; RV32IFD-NEXT: lui a3, 524288 @@ -1624,13 +1624,13 @@ define signext i16 @fcvt_w_s_i16(double %a) nounwind { define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV32IFD-LABEL: fcvt_w_s_sat_i16: ; RV32IFD: # %bb.0: # %start -; RV32IFD-NEXT: lui a0, %hi(.LCPI26_0) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI26_0)(a0) -; RV32IFD-NEXT: lui a0, %hi(.LCPI26_1) -; RV32IFD-NEXT: fld fa4, %lo(.LCPI26_1)(a0) ; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV32IFD-NEXT: lui a1, %hi(.LCPI26_0) +; RV32IFD-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV32IFD-NEXT: lui a1, %hi(.LCPI26_1) ; RV32IFD-NEXT: neg a0, a0 +; RV32IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV32IFD-NEXT: fld fa4, %lo(.LCPI26_1)(a1) ; RV32IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV32IFD-NEXT: fcvt.w.d a1, fa5, rtz ; RV32IFD-NEXT: and a0, a0, a1 @@ -1638,13 +1638,13 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; ; RV64IFD-LABEL: fcvt_w_s_sat_i16: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: lui a0, %hi(.LCPI26_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI26_0)(a0) -; RV64IFD-NEXT: lui a0, %hi(.LCPI26_1) -; RV64IFD-NEXT: fld fa4, %lo(.LCPI26_1)(a0) ; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV64IFD-NEXT: lui a1, %hi(.LCPI26_0) +; RV64IFD-NEXT: fld fa5, %lo(.LCPI26_0)(a1) +; RV64IFD-NEXT: lui a1, %hi(.LCPI26_1) ; RV64IFD-NEXT: neg a0, a0 +; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV64IFD-NEXT: fld fa4, %lo(.LCPI26_1)(a1) ; RV64IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV64IFD-NEXT: fcvt.l.d a1, fa5, rtz ; RV64IFD-NEXT: and a0, a0, a1 @@ -1653,31 +1653,31 @@ define signext i16 @fcvt_w_s_sat_i16(double %a) nounwind { ; RV32IZFINXZDINX-LABEL: fcvt_w_s_sat_i16: ; RV32IZFINXZDINX: # %bb.0: # %start ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI26_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI26_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI26_0)(a2) -; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI26_1) -; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI26_1+4)(a4) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI26_1)(a4) -; RV32IZFINXZDINX-NEXT: fmax.d a2, a0, a2 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI26_1) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI26_0)(a2) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI26_0+4)(a2) +; RV32IZFINXZDINX-NEXT: fmax.d a4, a0, a4 ; RV32IZFINXZDINX-NEXT: feq.d a0, a0, a0 ; RV32IZFINXZDINX-NEXT: neg a0, a0 -; RV32IZFINXZDINX-NEXT: fmin.d a2, a2, a4 +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI26_1)(a3) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI26_1+4)(a3) +; RV32IZFINXZDINX-NEXT: fmin.d a2, a4, a2 ; RV32IZFINXZDINX-NEXT: fcvt.w.d a1, a2, rtz ; RV32IZFINXZDINX-NEXT: and a0, a0, a1 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcvt_w_s_sat_i16: ; RV64IZFINXZDINX: # %bb.0: # %start -; RV64IZFINXZDINX-NEXT: li a1, -505 +; RV64IZFINXZDINX-NEXT: feq.d a1, a0, a0 +; RV64IZFINXZDINX-NEXT: li a2, -505 +; RV64IZFINXZDINX-NEXT: slli a2, a2, 53 +; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, a2 ; RV64IZFINXZDINX-NEXT: lui a2, %hi(.LCPI26_0) -; RV64IZFINXZDINX-NEXT: slli a1, a1, 53 +; RV64IZFINXZDINX-NEXT: neg a1, a1 ; RV64IZFINXZDINX-NEXT: ld a2, %lo(.LCPI26_0)(a2) -; RV64IZFINXZDINX-NEXT: fmax.d a1, a0, a1 -; RV64IZFINXZDINX-NEXT: feq.d a0, a0, a0 -; RV64IZFINXZDINX-NEXT: neg a0, a0 -; RV64IZFINXZDINX-NEXT: fmin.d a1, a1, a2 -; RV64IZFINXZDINX-NEXT: fcvt.l.d a1, a1, rtz -; RV64IZFINXZDINX-NEXT: and a0, a0, a1 +; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a2 +; RV64IZFINXZDINX-NEXT: fcvt.l.d a0, a0, rtz +; RV64IZFINXZDINX-NEXT: and a0, a1, a0 ; RV64IZFINXZDINX-NEXT: ret ; ; RV32I-LABEL: fcvt_w_s_sat_i16: @@ -1829,40 +1829,40 @@ define zeroext i16 @fcvt_wu_s_i16(double %a) nounwind { define zeroext i16 @fcvt_wu_s_sat_i16(double %a) nounwind { ; RV32IFD-LABEL: fcvt_wu_s_sat_i16: ; RV32IFD: # %bb.0: # %start +; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: lui a0, %hi(.LCPI28_0) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI28_0)(a0) -; RV32IFD-NEXT: fcvt.d.w fa4, zero -; RV32IFD-NEXT: fmax.d fa4, fa0, fa4 -; RV32IFD-NEXT: fmin.d fa5, fa4, fa5 +; RV32IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV32IFD-NEXT: fld fa4, %lo(.LCPI28_0)(a0) +; RV32IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV32IFD-NEXT: fcvt.wu.d a0, fa5, rtz ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_wu_s_sat_i16: ; RV64IFD: # %bb.0: # %start +; RV64IFD-NEXT: fmv.d.x fa5, zero ; RV64IFD-NEXT: lui a0, %hi(.LCPI28_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI28_0)(a0) -; RV64IFD-NEXT: fmv.d.x fa4, zero -; RV64IFD-NEXT: fmax.d fa4, fa0, fa4 -; RV64IFD-NEXT: fmin.d fa5, fa4, fa5 +; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV64IFD-NEXT: fld fa4, %lo(.LCPI28_0)(a0) +; RV64IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV64IFD-NEXT: fcvt.lu.d a0, fa5, rtz ; RV64IFD-NEXT: ret ; ; RV32IZFINXZDINX-LABEL: fcvt_wu_s_sat_i16: ; RV32IZFINXZDINX: # %bb.0: # %start +; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero +; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a2 ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI28_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI28_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI28_0)(a2) -; RV32IZFINXZDINX-NEXT: fcvt.d.w a4, zero -; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a4 ; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a2 ; RV32IZFINXZDINX-NEXT: fcvt.wu.d a0, a0, rtz ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcvt_wu_s_sat_i16: ; RV64IZFINXZDINX: # %bb.0: # %start +; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, zero ; RV64IZFINXZDINX-NEXT: lui a1, %hi(.LCPI28_0) ; RV64IZFINXZDINX-NEXT: ld a1, %lo(.LCPI28_0)(a1) -; RV64IZFINXZDINX-NEXT: fmax.d a0, a0, zero ; RV64IZFINXZDINX-NEXT: fmin.d a0, a0, a1 ; RV64IZFINXZDINX-NEXT: fcvt.lu.d a0, a0, rtz ; RV64IZFINXZDINX-NEXT: ret @@ -1999,13 +1999,13 @@ define signext i8 @fcvt_w_s_i8(double %a) nounwind { define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV32IFD-LABEL: fcvt_w_s_sat_i8: ; RV32IFD: # %bb.0: # %start -; RV32IFD-NEXT: lui a0, %hi(.LCPI30_0) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI30_0)(a0) -; RV32IFD-NEXT: lui a0, %hi(.LCPI30_1) -; RV32IFD-NEXT: fld fa4, %lo(.LCPI30_1)(a0) ; RV32IFD-NEXT: feq.d a0, fa0, fa0 -; RV32IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV32IFD-NEXT: lui a1, %hi(.LCPI30_0) +; RV32IFD-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; RV32IFD-NEXT: lui a1, %hi(.LCPI30_1) ; RV32IFD-NEXT: neg a0, a0 +; RV32IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV32IFD-NEXT: fld fa4, %lo(.LCPI30_1)(a1) ; RV32IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV32IFD-NEXT: fcvt.w.d a1, fa5, rtz ; RV32IFD-NEXT: and a0, a0, a1 @@ -2013,13 +2013,13 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; ; RV64IFD-LABEL: fcvt_w_s_sat_i8: ; RV64IFD: # %bb.0: # %start -; RV64IFD-NEXT: lui a0, %hi(.LCPI30_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI30_0)(a0) -; RV64IFD-NEXT: lui a0, %hi(.LCPI30_1) -; RV64IFD-NEXT: fld fa4, %lo(.LCPI30_1)(a0) ; RV64IFD-NEXT: feq.d a0, fa0, fa0 -; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV64IFD-NEXT: lui a1, %hi(.LCPI30_0) +; RV64IFD-NEXT: fld fa5, %lo(.LCPI30_0)(a1) +; RV64IFD-NEXT: lui a1, %hi(.LCPI30_1) ; RV64IFD-NEXT: neg a0, a0 +; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV64IFD-NEXT: fld fa4, %lo(.LCPI30_1)(a1) ; RV64IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV64IFD-NEXT: fcvt.l.d a1, fa5, rtz ; RV64IFD-NEXT: and a0, a0, a1 @@ -2028,15 +2028,15 @@ define signext i8 @fcvt_w_s_sat_i8(double %a) nounwind { ; RV32IZFINXZDINX-LABEL: fcvt_w_s_sat_i8: ; RV32IZFINXZDINX: # %bb.0: # %start ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI30_0) -; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI30_0+4)(a2) -; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI30_0)(a2) -; RV32IZFINXZDINX-NEXT: lui a4, %hi(.LCPI30_1) -; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI30_1+4)(a4) -; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI30_1)(a4) -; RV32IZFINXZDINX-NEXT: fmax.d a2, a0, a2 +; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI30_1) +; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI30_0)(a2) +; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI30_0+4)(a2) +; RV32IZFINXZDINX-NEXT: fmax.d a4, a0, a4 ; RV32IZFINXZDINX-NEXT: feq.d a0, a0, a0 ; RV32IZFINXZDINX-NEXT: neg a0, a0 -; RV32IZFINXZDINX-NEXT: fmin.d a2, a2, a4 +; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI30_1)(a3) +; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI30_1+4)(a3) +; RV32IZFINXZDINX-NEXT: fmin.d a2, a4, a2 ; RV32IZFINXZDINX-NEXT: fcvt.w.d a1, a2, rtz ; RV32IZFINXZDINX-NEXT: and a0, a0, a1 ; RV32IZFINXZDINX-NEXT: ret @@ -2203,31 +2203,31 @@ define zeroext i8 @fcvt_wu_s_sat_i8(double %a) nounwind { ; ; RV32IFD-LABEL: fcvt_wu_s_sat_i8: ; RV32IFD: # %bb.0: # %start +; RV32IFD-NEXT: fcvt.d.w fa5, zero ; RV32IFD-NEXT: lui a0, %hi(.LCPI32_0) -; RV32IFD-NEXT: fld fa5, %lo(.LCPI32_0)(a0) -; RV32IFD-NEXT: fcvt.d.w fa4, zero -; RV32IFD-NEXT: fmax.d fa4, fa0, fa4 -; RV32IFD-NEXT: fmin.d fa5, fa4, fa5 +; RV32IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV32IFD-NEXT: fld fa4, %lo(.LCPI32_0)(a0) +; RV32IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV32IFD-NEXT: fcvt.wu.d a0, fa5, rtz ; RV32IFD-NEXT: ret ; ; RV64IFD-LABEL: fcvt_wu_s_sat_i8: ; RV64IFD: # %bb.0: # %start +; RV64IFD-NEXT: fmv.d.x fa5, zero ; RV64IFD-NEXT: lui a0, %hi(.LCPI32_0) -; RV64IFD-NEXT: fld fa5, %lo(.LCPI32_0)(a0) -; RV64IFD-NEXT: fmv.d.x fa4, zero -; RV64IFD-NEXT: fmax.d fa4, fa0, fa4 -; RV64IFD-NEXT: fmin.d fa5, fa4, fa5 +; RV64IFD-NEXT: fmax.d fa5, fa0, fa5 +; RV64IFD-NEXT: fld fa4, %lo(.LCPI32_0)(a0) +; RV64IFD-NEXT: fmin.d fa5, fa5, fa4 ; RV64IFD-NEXT: fcvt.lu.d a0, fa5, rtz ; RV64IFD-NEXT: ret ; ; RV32IZFINXZDINX-LABEL: fcvt_wu_s_sat_i8: ; RV32IZFINXZDINX: # %bb.0: # %start +; RV32IZFINXZDINX-NEXT: fcvt.d.w a2, zero +; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a2 ; RV32IZFINXZDINX-NEXT: lui a2, %hi(.LCPI32_0) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI32_0+4)(a2) ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI32_0)(a2) -; RV32IZFINXZDINX-NEXT: fcvt.d.w a4, zero -; RV32IZFINXZDINX-NEXT: fmax.d a0, a0, a4 ; RV32IZFINXZDINX-NEXT: fmin.d a0, a0, a2 ; RV32IZFINXZDINX-NEXT: fcvt.wu.d a0, a0, rtz ; RV32IZFINXZDINX-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll index 949668f640dbd..30f995207851f 100644 --- a/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll +++ b/llvm/test/CodeGen/RISCV/double-fcmp-strict.ll @@ -275,8 +275,8 @@ define i32 @fcmp_one(double %a, double %b) nounwind strictfp { ; CHECKIFD-NEXT: frflags a0 ; CHECKIFD-NEXT: flt.d a2, fa1, fa0 ; CHECKIFD-NEXT: fsflags a0 -; CHECKIFD-NEXT: or a0, a2, a1 ; CHECKIFD-NEXT: feq.d zero, fa1, fa0 +; CHECKIFD-NEXT: or a0, a2, a1 ; CHECKIFD-NEXT: ret ; ; RV32IZFINXZDINX-LABEL: fcmp_one: @@ -288,9 +288,8 @@ define i32 @fcmp_one(double %a, double %b) nounwind strictfp { ; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: flt.d a6, a2, a0 ; RV32IZFINXZDINX-NEXT: fsflags a4 -; RV32IZFINXZDINX-NEXT: or a4, a6, a5 ; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, a4 +; RV32IZFINXZDINX-NEXT: or a0, a6, a5 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcmp_one: @@ -302,9 +301,8 @@ define i32 @fcmp_one(double %a, double %b) nounwind strictfp { ; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: flt.d a4, a1, a0 ; RV64IZFINXZDINX-NEXT: fsflags a2 -; RV64IZFINXZDINX-NEXT: or a2, a4, a3 ; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0 -; RV64IZFINXZDINX-NEXT: mv a0, a2 +; RV64IZFINXZDINX-NEXT: or a0, a4, a3 ; RV64IZFINXZDINX-NEXT: ret ; ; RV32I-LABEL: fcmp_one: @@ -423,9 +421,9 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp { ; CHECKIFD-NEXT: frflags a0 ; CHECKIFD-NEXT: flt.d a2, fa1, fa0 ; CHECKIFD-NEXT: fsflags a0 +; CHECKIFD-NEXT: feq.d zero, fa1, fa0 ; CHECKIFD-NEXT: or a1, a2, a1 ; CHECKIFD-NEXT: xori a0, a1, 1 -; CHECKIFD-NEXT: feq.d zero, fa1, fa0 ; CHECKIFD-NEXT: ret ; ; RV32IZFINXZDINX-LABEL: fcmp_ueq: @@ -437,10 +435,9 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp { ; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: flt.d a6, a2, a0 ; RV32IZFINXZDINX-NEXT: fsflags a4 -; RV32IZFINXZDINX-NEXT: or a4, a6, a5 -; RV32IZFINXZDINX-NEXT: xori a4, a4, 1 ; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, a4 +; RV32IZFINXZDINX-NEXT: or a0, a6, a5 +; RV32IZFINXZDINX-NEXT: xori a0, a0, 1 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcmp_ueq: @@ -452,10 +449,9 @@ define i32 @fcmp_ueq(double %a, double %b) nounwind strictfp { ; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: flt.d a4, a1, a0 ; RV64IZFINXZDINX-NEXT: fsflags a2 -; RV64IZFINXZDINX-NEXT: or a3, a4, a3 -; RV64IZFINXZDINX-NEXT: xori a2, a3, 1 ; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0 -; RV64IZFINXZDINX-NEXT: mv a0, a2 +; RV64IZFINXZDINX-NEXT: or a3, a4, a3 +; RV64IZFINXZDINX-NEXT: xori a0, a3, 1 ; RV64IZFINXZDINX-NEXT: ret ; ; RV32I-LABEL: fcmp_ueq: @@ -522,8 +518,8 @@ define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp { ; CHECKIFD-NEXT: frflags a0 ; CHECKIFD-NEXT: fle.d a1, fa0, fa1 ; CHECKIFD-NEXT: fsflags a0 -; CHECKIFD-NEXT: xori a0, a1, 1 ; CHECKIFD-NEXT: feq.d zero, fa0, fa1 +; CHECKIFD-NEXT: xori a0, a1, 1 ; CHECKIFD-NEXT: ret ; ; RV32IZFINXZDINX-LABEL: fcmp_ugt: @@ -531,9 +527,8 @@ define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp { ; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: fle.d a5, a0, a2 ; RV32IZFINXZDINX-NEXT: fsflags a4 -; RV32IZFINXZDINX-NEXT: xori a4, a5, 1 ; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2 -; RV32IZFINXZDINX-NEXT: mv a0, a4 +; RV32IZFINXZDINX-NEXT: xori a0, a5, 1 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcmp_ugt: @@ -541,9 +536,8 @@ define i32 @fcmp_ugt(double %a, double %b) nounwind strictfp { ; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: fle.d a3, a0, a1 ; RV64IZFINXZDINX-NEXT: fsflags a2 -; RV64IZFINXZDINX-NEXT: xori a2, a3, 1 ; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1 -; RV64IZFINXZDINX-NEXT: mv a0, a2 +; RV64IZFINXZDINX-NEXT: xori a0, a3, 1 ; RV64IZFINXZDINX-NEXT: ret ; ; RV32I-LABEL: fcmp_ugt: @@ -576,8 +570,8 @@ define i32 @fcmp_uge(double %a, double %b) nounwind strictfp { ; CHECKIFD-NEXT: frflags a0 ; CHECKIFD-NEXT: flt.d a1, fa0, fa1 ; CHECKIFD-NEXT: fsflags a0 -; CHECKIFD-NEXT: xori a0, a1, 1 ; CHECKIFD-NEXT: feq.d zero, fa0, fa1 +; CHECKIFD-NEXT: xori a0, a1, 1 ; CHECKIFD-NEXT: ret ; ; RV32IZFINXZDINX-LABEL: fcmp_uge: @@ -585,9 +579,8 @@ define i32 @fcmp_uge(double %a, double %b) nounwind strictfp { ; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: flt.d a5, a0, a2 ; RV32IZFINXZDINX-NEXT: fsflags a4 -; RV32IZFINXZDINX-NEXT: xori a4, a5, 1 ; RV32IZFINXZDINX-NEXT: feq.d zero, a0, a2 -; RV32IZFINXZDINX-NEXT: mv a0, a4 +; RV32IZFINXZDINX-NEXT: xori a0, a5, 1 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcmp_uge: @@ -595,9 +588,8 @@ define i32 @fcmp_uge(double %a, double %b) nounwind strictfp { ; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: flt.d a3, a0, a1 ; RV64IZFINXZDINX-NEXT: fsflags a2 -; RV64IZFINXZDINX-NEXT: xori a2, a3, 1 ; RV64IZFINXZDINX-NEXT: feq.d zero, a0, a1 -; RV64IZFINXZDINX-NEXT: mv a0, a2 +; RV64IZFINXZDINX-NEXT: xori a0, a3, 1 ; RV64IZFINXZDINX-NEXT: ret ; ; RV32I-LABEL: fcmp_uge: @@ -632,8 +624,8 @@ define i32 @fcmp_ult(double %a, double %b) nounwind strictfp { ; CHECKIFD-NEXT: frflags a0 ; CHECKIFD-NEXT: fle.d a1, fa1, fa0 ; CHECKIFD-NEXT: fsflags a0 -; CHECKIFD-NEXT: xori a0, a1, 1 ; CHECKIFD-NEXT: feq.d zero, fa1, fa0 +; CHECKIFD-NEXT: xori a0, a1, 1 ; CHECKIFD-NEXT: ret ; ; RV32IZFINXZDINX-LABEL: fcmp_ult: @@ -641,9 +633,8 @@ define i32 @fcmp_ult(double %a, double %b) nounwind strictfp { ; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: fle.d a5, a2, a0 ; RV32IZFINXZDINX-NEXT: fsflags a4 -; RV32IZFINXZDINX-NEXT: xori a4, a5, 1 ; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, a4 +; RV32IZFINXZDINX-NEXT: xori a0, a5, 1 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcmp_ult: @@ -651,9 +642,8 @@ define i32 @fcmp_ult(double %a, double %b) nounwind strictfp { ; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: fle.d a3, a1, a0 ; RV64IZFINXZDINX-NEXT: fsflags a2 -; RV64IZFINXZDINX-NEXT: xori a2, a3, 1 ; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0 -; RV64IZFINXZDINX-NEXT: mv a0, a2 +; RV64IZFINXZDINX-NEXT: xori a0, a3, 1 ; RV64IZFINXZDINX-NEXT: ret ; ; RV32I-LABEL: fcmp_ult: @@ -686,8 +676,8 @@ define i32 @fcmp_ule(double %a, double %b) nounwind strictfp { ; CHECKIFD-NEXT: frflags a0 ; CHECKIFD-NEXT: flt.d a1, fa1, fa0 ; CHECKIFD-NEXT: fsflags a0 -; CHECKIFD-NEXT: xori a0, a1, 1 ; CHECKIFD-NEXT: feq.d zero, fa1, fa0 +; CHECKIFD-NEXT: xori a0, a1, 1 ; CHECKIFD-NEXT: ret ; ; RV32IZFINXZDINX-LABEL: fcmp_ule: @@ -695,9 +685,8 @@ define i32 @fcmp_ule(double %a, double %b) nounwind strictfp { ; RV32IZFINXZDINX-NEXT: frflags a4 ; RV32IZFINXZDINX-NEXT: flt.d a5, a2, a0 ; RV32IZFINXZDINX-NEXT: fsflags a4 -; RV32IZFINXZDINX-NEXT: xori a4, a5, 1 ; RV32IZFINXZDINX-NEXT: feq.d zero, a2, a0 -; RV32IZFINXZDINX-NEXT: mv a0, a4 +; RV32IZFINXZDINX-NEXT: xori a0, a5, 1 ; RV32IZFINXZDINX-NEXT: ret ; ; RV64IZFINXZDINX-LABEL: fcmp_ule: @@ -705,9 +694,8 @@ define i32 @fcmp_ule(double %a, double %b) nounwind strictfp { ; RV64IZFINXZDINX-NEXT: frflags a2 ; RV64IZFINXZDINX-NEXT: flt.d a3, a1, a0 ; RV64IZFINXZDINX-NEXT: fsflags a2 -; RV64IZFINXZDINX-NEXT: xori a2, a3, 1 ; RV64IZFINXZDINX-NEXT: feq.d zero, a1, a0 -; RV64IZFINXZDINX-NEXT: mv a0, a2 +; RV64IZFINXZDINX-NEXT: xori a0, a3, 1 ; RV64IZFINXZDINX-NEXT: ret ; ; RV32I-LABEL: fcmp_ule: diff --git a/llvm/test/CodeGen/RISCV/double-imm.ll b/llvm/test/CodeGen/RISCV/double-imm.ll index 155827ad069cc..97fc1bde6155f 100644 --- a/llvm/test/CodeGen/RISCV/double-imm.ll +++ b/llvm/test/CodeGen/RISCV/double-imm.ll @@ -158,12 +158,12 @@ define dso_local double @negzero_sel(i16 noundef %a, double noundef %d) nounwind ; ; CHECKRV64ZDINX-LABEL: negzero_sel: ; CHECKRV64ZDINX: # %bb.0: # %entry -; CHECKRV64ZDINX-NEXT: slli a2, a0, 48 -; CHECKRV64ZDINX-NEXT: mv a0, a1 -; CHECKRV64ZDINX-NEXT: beqz a2, .LBB4_2 +; CHECKRV64ZDINX-NEXT: slli a0, a0, 48 +; CHECKRV64ZDINX-NEXT: beqz a0, .LBB4_2 ; CHECKRV64ZDINX-NEXT: # %bb.1: # %entry -; CHECKRV64ZDINX-NEXT: fneg.d a0, zero +; CHECKRV64ZDINX-NEXT: fneg.d a1, zero ; CHECKRV64ZDINX-NEXT: .LBB4_2: # %entry +; CHECKRV64ZDINX-NEXT: mv a0, a1 ; CHECKRV64ZDINX-NEXT: ret entry: %tobool.not = icmp eq i16 %a, 0 diff --git a/llvm/test/CodeGen/RISCV/double-mem.ll b/llvm/test/CodeGen/RISCV/double-mem.ll index dba9489e7511d..134c8cb0689ca 100644 --- a/llvm/test/CodeGen/RISCV/double-mem.ll +++ b/llvm/test/CodeGen/RISCV/double-mem.ll @@ -51,10 +51,10 @@ define dso_local void @fsd(ptr %a, double %b, double %c) nounwind { ; RV32IZFINXZDINX-LABEL: fsd: ; RV32IZFINXZDINX: # %bb.0: ; RV32IZFINXZDINX-NEXT: mv a5, a4 -; RV32IZFINXZDINX-NEXT: mv a7, a2 ; RV32IZFINXZDINX-NEXT: mv a4, a3 -; RV32IZFINXZDINX-NEXT: mv a6, a1 -; RV32IZFINXZDINX-NEXT: fadd.d a2, a6, a4 +; RV32IZFINXZDINX-NEXT: mv a3, a2 +; RV32IZFINXZDINX-NEXT: mv a2, a1 +; RV32IZFINXZDINX-NEXT: fadd.d a2, a2, a4 ; RV32IZFINXZDINX-NEXT: sw a2, 0(a0) ; RV32IZFINXZDINX-NEXT: sw a3, 4(a0) ; RV32IZFINXZDINX-NEXT: sw a2, 64(a0) diff --git a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll index cd87f2d2301d7..8ebeeabec4a09 100644 --- a/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll +++ b/llvm/test/CodeGen/RISCV/double-round-conv-sat.ll @@ -48,9 +48,9 @@ define i64 @test_floor_si64(double %x) nounwind { ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call floor +; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: lui a0, %hi(.LCPI1_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI1_0)(a0) -; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi ; RV32IFD-NEXT: lui a3, 524288 @@ -103,9 +103,9 @@ define i64 @test_floor_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI1_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI1_0)(a2) ; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI1_0+4)(a2) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI1_1)(a3) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI1_1+4)(a3) -; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 ; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 @@ -185,12 +185,12 @@ define i64 @test_floor_ui64(double %x) nounwind { ; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call floor ; RV32IFD-NEXT: lui a0, %hi(.LCPI3_0) +; RV32IFD-NEXT: fcvt.d.w fa5, zero +; RV32IFD-NEXT: fle.d a1, fa5, fa0 ; RV32IFD-NEXT: fld fa5, %lo(.LCPI3_0)(a0) -; RV32IFD-NEXT: fcvt.d.w fa4, zero -; RV32IFD-NEXT: fle.d a0, fa4, fa0 -; RV32IFD-NEXT: flt.d a1, fa5, fa0 -; RV32IFD-NEXT: neg s0, a1 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: flt.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: neg s1, a1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: and a1, s1, a1 @@ -292,9 +292,9 @@ define i64 @test_ceil_si64(double %x) nounwind { ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call ceil +; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: lui a0, %hi(.LCPI5_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI5_0)(a0) -; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi ; RV32IFD-NEXT: lui a3, 524288 @@ -347,9 +347,9 @@ define i64 @test_ceil_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI5_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI5_0)(a2) ; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI5_0+4)(a2) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI5_1)(a3) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI5_1+4)(a3) -; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 ; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 @@ -429,12 +429,12 @@ define i64 @test_ceil_ui64(double %x) nounwind { ; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call ceil ; RV32IFD-NEXT: lui a0, %hi(.LCPI7_0) +; RV32IFD-NEXT: fcvt.d.w fa5, zero +; RV32IFD-NEXT: fle.d a1, fa5, fa0 ; RV32IFD-NEXT: fld fa5, %lo(.LCPI7_0)(a0) -; RV32IFD-NEXT: fcvt.d.w fa4, zero -; RV32IFD-NEXT: fle.d a0, fa4, fa0 -; RV32IFD-NEXT: flt.d a1, fa5, fa0 -; RV32IFD-NEXT: neg s0, a1 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: flt.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: neg s1, a1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: and a1, s1, a1 @@ -536,9 +536,9 @@ define i64 @test_trunc_si64(double %x) nounwind { ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call trunc +; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: lui a0, %hi(.LCPI9_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI9_0)(a0) -; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi ; RV32IFD-NEXT: lui a3, 524288 @@ -591,9 +591,9 @@ define i64 @test_trunc_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI9_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI9_0)(a2) ; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI9_0+4)(a2) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI9_1)(a3) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI9_1+4)(a3) -; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 ; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 @@ -673,12 +673,12 @@ define i64 @test_trunc_ui64(double %x) nounwind { ; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call trunc ; RV32IFD-NEXT: lui a0, %hi(.LCPI11_0) +; RV32IFD-NEXT: fcvt.d.w fa5, zero +; RV32IFD-NEXT: fle.d a1, fa5, fa0 ; RV32IFD-NEXT: fld fa5, %lo(.LCPI11_0)(a0) -; RV32IFD-NEXT: fcvt.d.w fa4, zero -; RV32IFD-NEXT: fle.d a0, fa4, fa0 -; RV32IFD-NEXT: flt.d a1, fa5, fa0 -; RV32IFD-NEXT: neg s0, a1 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: flt.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: neg s1, a1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: and a1, s1, a1 @@ -780,9 +780,9 @@ define i64 @test_round_si64(double %x) nounwind { ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call round +; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: lui a0, %hi(.LCPI13_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI13_0)(a0) -; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi ; RV32IFD-NEXT: lui a3, 524288 @@ -835,9 +835,9 @@ define i64 @test_round_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI13_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI13_0)(a2) ; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI13_0+4)(a2) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI13_1)(a3) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI13_1+4)(a3) -; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 ; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 @@ -917,12 +917,12 @@ define i64 @test_round_ui64(double %x) nounwind { ; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call round ; RV32IFD-NEXT: lui a0, %hi(.LCPI15_0) +; RV32IFD-NEXT: fcvt.d.w fa5, zero +; RV32IFD-NEXT: fle.d a1, fa5, fa0 ; RV32IFD-NEXT: fld fa5, %lo(.LCPI15_0)(a0) -; RV32IFD-NEXT: fcvt.d.w fa4, zero -; RV32IFD-NEXT: fle.d a0, fa4, fa0 -; RV32IFD-NEXT: flt.d a1, fa5, fa0 -; RV32IFD-NEXT: neg s0, a1 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: flt.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: neg s1, a1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: and a1, s1, a1 @@ -1024,9 +1024,9 @@ define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call roundeven +; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: lui a0, %hi(.LCPI17_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI17_0)(a0) -; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi ; RV32IFD-NEXT: lui a3, 524288 @@ -1079,9 +1079,9 @@ define i64 @test_roundeven_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI17_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI17_0)(a2) ; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI17_0+4)(a2) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI17_1)(a3) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI17_1+4)(a3) -; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 ; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 @@ -1161,12 +1161,12 @@ define i64 @test_roundeven_ui64(double %x) nounwind { ; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call roundeven ; RV32IFD-NEXT: lui a0, %hi(.LCPI19_0) +; RV32IFD-NEXT: fcvt.d.w fa5, zero +; RV32IFD-NEXT: fle.d a1, fa5, fa0 ; RV32IFD-NEXT: fld fa5, %lo(.LCPI19_0)(a0) -; RV32IFD-NEXT: fcvt.d.w fa4, zero -; RV32IFD-NEXT: fle.d a0, fa4, fa0 -; RV32IFD-NEXT: flt.d a1, fa5, fa0 -; RV32IFD-NEXT: neg s0, a1 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: flt.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: neg s1, a1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: and a1, s1, a1 @@ -1268,9 +1268,9 @@ define i64 @test_rint_si64(double %x) nounwind { ; RV32IFD-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: fsd fs0, 0(sp) # 8-byte Folded Spill ; RV32IFD-NEXT: call rint +; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: lui a0, %hi(.LCPI21_0) ; RV32IFD-NEXT: fld fa5, %lo(.LCPI21_0)(a0) -; RV32IFD-NEXT: fmv.d fs0, fa0 ; RV32IFD-NEXT: fle.d s0, fa5, fa0 ; RV32IFD-NEXT: call __fixdfdi ; RV32IFD-NEXT: lui a3, 524288 @@ -1323,9 +1323,9 @@ define i64 @test_rint_si64(double %x) nounwind { ; RV32IZFINXZDINX-NEXT: lui a3, %hi(.LCPI21_1) ; RV32IZFINXZDINX-NEXT: lw a4, %lo(.LCPI21_0)(a2) ; RV32IZFINXZDINX-NEXT: lw a5, %lo(.LCPI21_0+4)(a2) +; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: lw a2, %lo(.LCPI21_1)(a3) ; RV32IZFINXZDINX-NEXT: lw a3, %lo(.LCPI21_1+4)(a3) -; RV32IZFINXZDINX-NEXT: fle.d a6, a4, s0 ; RV32IZFINXZDINX-NEXT: flt.d a3, a2, s0 ; RV32IZFINXZDINX-NEXT: feq.d a2, s0, s0 ; RV32IZFINXZDINX-NEXT: lui a4, 524288 @@ -1405,12 +1405,12 @@ define i64 @test_rint_ui64(double %x) nounwind { ; RV32IFD-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: call rint ; RV32IFD-NEXT: lui a0, %hi(.LCPI23_0) +; RV32IFD-NEXT: fcvt.d.w fa5, zero +; RV32IFD-NEXT: fle.d a1, fa5, fa0 ; RV32IFD-NEXT: fld fa5, %lo(.LCPI23_0)(a0) -; RV32IFD-NEXT: fcvt.d.w fa4, zero -; RV32IFD-NEXT: fle.d a0, fa4, fa0 -; RV32IFD-NEXT: flt.d a1, fa5, fa0 -; RV32IFD-NEXT: neg s0, a1 -; RV32IFD-NEXT: neg s1, a0 +; RV32IFD-NEXT: flt.d a0, fa5, fa0 +; RV32IFD-NEXT: neg s0, a0 +; RV32IFD-NEXT: neg s1, a1 ; RV32IFD-NEXT: call __fixunsdfdi ; RV32IFD-NEXT: and a0, s1, a0 ; RV32IFD-NEXT: and a1, s1, a1 diff --git a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll index e7ff991413013..10c417174e7fd 100644 --- a/llvm/test/CodeGen/RISCV/double-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/double-select-fcmp.ll @@ -545,22 +545,22 @@ define i32 @i32_select_fcmp_oeq(double %a, double %b, i32 %c, i32 %d) nounwind { ; ; CHECKRV32ZDINX-LABEL: i32_select_fcmp_oeq: ; CHECKRV32ZDINX: # %bb.0: -; CHECKRV32ZDINX-NEXT: feq.d a1, a0, a2 -; CHECKRV32ZDINX-NEXT: mv a0, a4 -; CHECKRV32ZDINX-NEXT: bnez a1, .LBB16_2 +; CHECKRV32ZDINX-NEXT: feq.d a0, a0, a2 +; CHECKRV32ZDINX-NEXT: bnez a0, .LBB16_2 ; CHECKRV32ZDINX-NEXT: # %bb.1: -; CHECKRV32ZDINX-NEXT: mv a0, a5 +; CHECKRV32ZDINX-NEXT: mv a4, a5 ; CHECKRV32ZDINX-NEXT: .LBB16_2: +; CHECKRV32ZDINX-NEXT: mv a0, a4 ; CHECKRV32ZDINX-NEXT: ret ; ; CHECKRV64ZDINX-LABEL: i32_select_fcmp_oeq: ; CHECKRV64ZDINX: # %bb.0: -; CHECKRV64ZDINX-NEXT: feq.d a1, a0, a1 -; CHECKRV64ZDINX-NEXT: mv a0, a2 -; CHECKRV64ZDINX-NEXT: bnez a1, .LBB16_2 +; CHECKRV64ZDINX-NEXT: feq.d a0, a0, a1 +; CHECKRV64ZDINX-NEXT: bnez a0, .LBB16_2 ; CHECKRV64ZDINX-NEXT: # %bb.1: -; CHECKRV64ZDINX-NEXT: mv a0, a3 +; CHECKRV64ZDINX-NEXT: mv a2, a3 ; CHECKRV64ZDINX-NEXT: .LBB16_2: +; CHECKRV64ZDINX-NEXT: mv a0, a2 ; CHECKRV64ZDINX-NEXT: ret %1 = fcmp oeq double %a, %b %2 = select i1 %1, i32 %c, i32 %d @@ -577,9 +577,9 @@ define i32 @select_fcmp_oeq_1_2(double %a, double %b) { ; ; CHECKRV32ZDINX-LABEL: select_fcmp_oeq_1_2: ; CHECKRV32ZDINX: # %bb.0: -; CHECKRV32ZDINX-NEXT: li a4, 2 ; CHECKRV32ZDINX-NEXT: feq.d a0, a0, a2 -; CHECKRV32ZDINX-NEXT: sub a0, a4, a0 +; CHECKRV32ZDINX-NEXT: li a1, 2 +; CHECKRV32ZDINX-NEXT: sub a0, a1, a0 ; CHECKRV32ZDINX-NEXT: ret ; ; CHECKRV64ZDINX-LABEL: select_fcmp_oeq_1_2: diff --git a/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll b/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll index 4ae912a34d337..4478e7b8c1724 100644 --- a/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll +++ b/llvm/test/CodeGen/RISCV/double-stack-spill-restore.ll @@ -39,9 +39,9 @@ define double @func(double %d, i32 %n) nounwind { ; ; RV64IFD-LABEL: func: ; RV64IFD: # %bb.0: # %entry -; RV64IFD-NEXT: sext.w a2, a1 ; RV64IFD-NEXT: fmv.d.x fa5, a0 -; RV64IFD-NEXT: beqz a2, .LBB0_2 +; RV64IFD-NEXT: sext.w a0, a1 +; RV64IFD-NEXT: beqz a0, .LBB0_2 ; RV64IFD-NEXT: # %bb.1: # %if.else ; RV64IFD-NEXT: addi sp, sp, -16 ; RV64IFD-NEXT: sd ra, 8(sp) # 8-byte Folded Spill diff --git a/llvm/test/CodeGen/RISCV/fastcc-bf16.ll b/llvm/test/CodeGen/RISCV/fastcc-bf16.ll index 17356116081ff..91577b96de6ba 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-bf16.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-bf16.ll @@ -23,6 +23,9 @@ define bfloat @caller(<32 x bfloat> %A) nounwind { ; CHECK-NEXT: fmv.h.x fa2, a2 ; CHECK-NEXT: fmv.h.x fa3, a3 ; CHECK-NEXT: fmv.h.x fa4, a4 +; CHECK-NEXT: fmv.h.x fa5, a5 +; CHECK-NEXT: fmv.h.x fa6, a6 +; CHECK-NEXT: fmv.h.x fa7, a7 ; CHECK-NEXT: flh ft0, 32(sp) ; CHECK-NEXT: flh ft1, 36(sp) ; CHECK-NEXT: flh ft2, 40(sp) @@ -47,9 +50,6 @@ define bfloat @caller(<32 x bfloat> %A) nounwind { ; CHECK-NEXT: flh fs9, 116(sp) ; CHECK-NEXT: flh fs10, 120(sp) ; CHECK-NEXT: flh fs11, 124(sp) -; CHECK-NEXT: fmv.h.x fa5, a5 -; CHECK-NEXT: fmv.h.x fa6, a6 -; CHECK-NEXT: fmv.h.x fa7, a7 ; CHECK-NEXT: fsh fs8, 16(sp) ; CHECK-NEXT: fsh fs9, 18(sp) ; CHECK-NEXT: fsh fs10, 20(sp) diff --git a/llvm/test/CodeGen/RISCV/fastcc-float.ll b/llvm/test/CodeGen/RISCV/fastcc-float.ll index 237a72d983de4..c1c5fc440d403 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-float.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-float.ll @@ -23,6 +23,9 @@ define float @caller(<32 x float> %A) nounwind { ; CHECK-NEXT: fmv.w.x fa2, a2 ; CHECK-NEXT: fmv.w.x fa3, a3 ; CHECK-NEXT: fmv.w.x fa4, a4 +; CHECK-NEXT: fmv.w.x fa5, a5 +; CHECK-NEXT: fmv.w.x fa6, a6 +; CHECK-NEXT: fmv.w.x fa7, a7 ; CHECK-NEXT: flw ft0, 64(sp) ; CHECK-NEXT: flw ft1, 68(sp) ; CHECK-NEXT: flw ft2, 72(sp) @@ -47,9 +50,6 @@ define float @caller(<32 x float> %A) nounwind { ; CHECK-NEXT: flw fs9, 148(sp) ; CHECK-NEXT: flw fs10, 152(sp) ; CHECK-NEXT: flw fs11, 156(sp) -; CHECK-NEXT: fmv.w.x fa5, a5 -; CHECK-NEXT: fmv.w.x fa6, a6 -; CHECK-NEXT: fmv.w.x fa7, a7 ; CHECK-NEXT: fsw fs8, 32(sp) ; CHECK-NEXT: fsw fs9, 36(sp) ; CHECK-NEXT: fsw fs10, 40(sp) diff --git a/llvm/test/CodeGen/RISCV/fastcc-half.ll b/llvm/test/CodeGen/RISCV/fastcc-half.ll index bf8d4e8dcb98c..b5c3f7ef8d523 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-half.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-half.ll @@ -23,6 +23,9 @@ define half @caller(<32 x half> %A) nounwind { ; CHECK-NEXT: fmv.h.x fa2, a2 ; CHECK-NEXT: fmv.h.x fa3, a3 ; CHECK-NEXT: fmv.h.x fa4, a4 +; CHECK-NEXT: fmv.h.x fa5, a5 +; CHECK-NEXT: fmv.h.x fa6, a6 +; CHECK-NEXT: fmv.h.x fa7, a7 ; CHECK-NEXT: flh ft0, 32(sp) ; CHECK-NEXT: flh ft1, 36(sp) ; CHECK-NEXT: flh ft2, 40(sp) @@ -47,9 +50,6 @@ define half @caller(<32 x half> %A) nounwind { ; CHECK-NEXT: flh fs9, 116(sp) ; CHECK-NEXT: flh fs10, 120(sp) ; CHECK-NEXT: flh fs11, 124(sp) -; CHECK-NEXT: fmv.h.x fa5, a5 -; CHECK-NEXT: fmv.h.x fa6, a6 -; CHECK-NEXT: fmv.h.x fa7, a7 ; CHECK-NEXT: fsh fs8, 16(sp) ; CHECK-NEXT: fsh fs9, 18(sp) ; CHECK-NEXT: fsh fs10, 20(sp) diff --git a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll index 8a91c46bcdaff..beb0df5f292be 100644 --- a/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll +++ b/llvm/test/CodeGen/RISCV/fastcc-without-f-reg.ll @@ -287,6 +287,7 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: mv a7, a3 ; ZHINX32-NEXT: mv a6, a2 ; ZHINX32-NEXT: mv a5, a1 +; ZHINX32-NEXT: mv a4, a0 ; ZHINX32-NEXT: lh t3, 112(sp) ; ZHINX32-NEXT: lh t4, 116(sp) ; ZHINX32-NEXT: lh t5, 120(sp) @@ -307,14 +308,14 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: lh s10, 180(sp) ; ZHINX32-NEXT: lh s11, 184(sp) ; ZHINX32-NEXT: lh ra, 188(sp) -; ZHINX32-NEXT: lh a1, 192(sp) -; ZHINX32-NEXT: lh a2, 196(sp) -; ZHINX32-NEXT: lh a3, 200(sp) -; ZHINX32-NEXT: lh a4, 204(sp) -; ZHINX32-NEXT: sh a1, 32(sp) -; ZHINX32-NEXT: sh a2, 34(sp) -; ZHINX32-NEXT: sh a3, 36(sp) -; ZHINX32-NEXT: sh a4, 38(sp) +; ZHINX32-NEXT: lh a0, 192(sp) +; ZHINX32-NEXT: lh a1, 196(sp) +; ZHINX32-NEXT: lh a2, 200(sp) +; ZHINX32-NEXT: lh a3, 204(sp) +; ZHINX32-NEXT: sh a0, 32(sp) +; ZHINX32-NEXT: sh a1, 34(sp) +; ZHINX32-NEXT: sh a2, 36(sp) +; ZHINX32-NEXT: sh a3, 38(sp) ; ZHINX32-NEXT: sh s9, 24(sp) ; ZHINX32-NEXT: sh s10, 26(sp) ; ZHINX32-NEXT: sh s11, 28(sp) @@ -331,6 +332,7 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX32-NEXT: sh t1, 2(sp) ; ZHINX32-NEXT: sh t2, 4(sp) ; ZHINX32-NEXT: sh s0, 6(sp) +; ZHINX32-NEXT: mv a0, a4 ; ZHINX32-NEXT: mv a1, a5 ; ZHINX32-NEXT: mv a2, a6 ; ZHINX32-NEXT: mv a3, a7 @@ -378,6 +380,7 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: mv a7, a3 ; ZHINX64-NEXT: mv a6, a2 ; ZHINX64-NEXT: mv a5, a1 +; ZHINX64-NEXT: mv a4, a0 ; ZHINX64-NEXT: lh t3, 160(sp) ; ZHINX64-NEXT: lh t4, 168(sp) ; ZHINX64-NEXT: lh t5, 176(sp) @@ -398,14 +401,14 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: lh s10, 296(sp) ; ZHINX64-NEXT: lh s11, 304(sp) ; ZHINX64-NEXT: lh ra, 312(sp) -; ZHINX64-NEXT: lh a1, 320(sp) -; ZHINX64-NEXT: lh a2, 328(sp) -; ZHINX64-NEXT: lh a3, 336(sp) -; ZHINX64-NEXT: lh a4, 344(sp) -; ZHINX64-NEXT: sh a1, 32(sp) -; ZHINX64-NEXT: sh a2, 34(sp) -; ZHINX64-NEXT: sh a3, 36(sp) -; ZHINX64-NEXT: sh a4, 38(sp) +; ZHINX64-NEXT: lh a0, 320(sp) +; ZHINX64-NEXT: lh a1, 328(sp) +; ZHINX64-NEXT: lh a2, 336(sp) +; ZHINX64-NEXT: lh a3, 344(sp) +; ZHINX64-NEXT: sh a0, 32(sp) +; ZHINX64-NEXT: sh a1, 34(sp) +; ZHINX64-NEXT: sh a2, 36(sp) +; ZHINX64-NEXT: sh a3, 38(sp) ; ZHINX64-NEXT: sh s9, 24(sp) ; ZHINX64-NEXT: sh s10, 26(sp) ; ZHINX64-NEXT: sh s11, 28(sp) @@ -422,6 +425,7 @@ define half @caller_half_32(<32 x half> %A) nounwind { ; ZHINX64-NEXT: sh t1, 2(sp) ; ZHINX64-NEXT: sh t2, 4(sp) ; ZHINX64-NEXT: sh s0, 6(sp) +; ZHINX64-NEXT: mv a0, a4 ; ZHINX64-NEXT: mv a1, a5 ; ZHINX64-NEXT: mv a2, a6 ; ZHINX64-NEXT: mv a3, a7 @@ -893,6 +897,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX32-NEXT: mv a7, a3 ; ZHINX32-NEXT: mv a6, a2 ; ZHINX32-NEXT: mv a5, a1 +; ZHINX32-NEXT: mv a4, a0 ; ZHINX32-NEXT: lw t3, 160(sp) ; ZHINX32-NEXT: lw t4, 164(sp) ; ZHINX32-NEXT: lw t5, 168(sp) @@ -913,14 +918,14 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX32-NEXT: lw s10, 228(sp) ; ZHINX32-NEXT: lw s11, 232(sp) ; ZHINX32-NEXT: lw ra, 236(sp) -; ZHINX32-NEXT: lw a1, 240(sp) -; ZHINX32-NEXT: lw a2, 244(sp) -; ZHINX32-NEXT: lw a3, 248(sp) -; ZHINX32-NEXT: lw a4, 252(sp) -; ZHINX32-NEXT: sw a1, 64(sp) -; ZHINX32-NEXT: sw a2, 68(sp) -; ZHINX32-NEXT: sw a3, 72(sp) -; ZHINX32-NEXT: sw a4, 76(sp) +; ZHINX32-NEXT: lw a0, 240(sp) +; ZHINX32-NEXT: lw a1, 244(sp) +; ZHINX32-NEXT: lw a2, 248(sp) +; ZHINX32-NEXT: lw a3, 252(sp) +; ZHINX32-NEXT: sw a0, 64(sp) +; ZHINX32-NEXT: sw a1, 68(sp) +; ZHINX32-NEXT: sw a2, 72(sp) +; ZHINX32-NEXT: sw a3, 76(sp) ; ZHINX32-NEXT: sw s9, 48(sp) ; ZHINX32-NEXT: sw s10, 52(sp) ; ZHINX32-NEXT: sw s11, 56(sp) @@ -937,6 +942,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX32-NEXT: sw t1, 4(sp) ; ZHINX32-NEXT: sw t2, 8(sp) ; ZHINX32-NEXT: sw s0, 12(sp) +; ZHINX32-NEXT: mv a0, a4 ; ZHINX32-NEXT: mv a1, a5 ; ZHINX32-NEXT: mv a2, a6 ; ZHINX32-NEXT: mv a3, a7 @@ -984,6 +990,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: mv a7, a3 ; ZHINX64-NEXT: mv a6, a2 ; ZHINX64-NEXT: mv a5, a1 +; ZHINX64-NEXT: mv a4, a0 ; ZHINX64-NEXT: lw t3, 208(sp) ; ZHINX64-NEXT: lw t4, 216(sp) ; ZHINX64-NEXT: lw t5, 224(sp) @@ -1004,14 +1011,14 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: lw s10, 344(sp) ; ZHINX64-NEXT: lw s11, 352(sp) ; ZHINX64-NEXT: lw ra, 360(sp) -; ZHINX64-NEXT: lw a1, 368(sp) -; ZHINX64-NEXT: lw a2, 376(sp) -; ZHINX64-NEXT: lw a3, 384(sp) -; ZHINX64-NEXT: lw a4, 392(sp) -; ZHINX64-NEXT: sw a1, 64(sp) -; ZHINX64-NEXT: sw a2, 68(sp) -; ZHINX64-NEXT: sw a3, 72(sp) -; ZHINX64-NEXT: sw a4, 76(sp) +; ZHINX64-NEXT: lw a0, 368(sp) +; ZHINX64-NEXT: lw a1, 376(sp) +; ZHINX64-NEXT: lw a2, 384(sp) +; ZHINX64-NEXT: lw a3, 392(sp) +; ZHINX64-NEXT: sw a0, 64(sp) +; ZHINX64-NEXT: sw a1, 68(sp) +; ZHINX64-NEXT: sw a2, 72(sp) +; ZHINX64-NEXT: sw a3, 76(sp) ; ZHINX64-NEXT: sw s9, 48(sp) ; ZHINX64-NEXT: sw s10, 52(sp) ; ZHINX64-NEXT: sw s11, 56(sp) @@ -1028,6 +1035,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZHINX64-NEXT: sw t1, 4(sp) ; ZHINX64-NEXT: sw t2, 8(sp) ; ZHINX64-NEXT: sw s0, 12(sp) +; ZHINX64-NEXT: mv a0, a4 ; ZHINX64-NEXT: mv a1, a5 ; ZHINX64-NEXT: mv a2, a6 ; ZHINX64-NEXT: mv a3, a7 @@ -1075,6 +1083,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX32-NEXT: mv a7, a3 ; ZFINX32-NEXT: mv a6, a2 ; ZFINX32-NEXT: mv a5, a1 +; ZFINX32-NEXT: mv a4, a0 ; ZFINX32-NEXT: lw t3, 160(sp) ; ZFINX32-NEXT: lw t4, 164(sp) ; ZFINX32-NEXT: lw t5, 168(sp) @@ -1095,14 +1104,14 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX32-NEXT: lw s10, 228(sp) ; ZFINX32-NEXT: lw s11, 232(sp) ; ZFINX32-NEXT: lw ra, 236(sp) -; ZFINX32-NEXT: lw a1, 240(sp) -; ZFINX32-NEXT: lw a2, 244(sp) -; ZFINX32-NEXT: lw a3, 248(sp) -; ZFINX32-NEXT: lw a4, 252(sp) -; ZFINX32-NEXT: sw a1, 64(sp) -; ZFINX32-NEXT: sw a2, 68(sp) -; ZFINX32-NEXT: sw a3, 72(sp) -; ZFINX32-NEXT: sw a4, 76(sp) +; ZFINX32-NEXT: lw a0, 240(sp) +; ZFINX32-NEXT: lw a1, 244(sp) +; ZFINX32-NEXT: lw a2, 248(sp) +; ZFINX32-NEXT: lw a3, 252(sp) +; ZFINX32-NEXT: sw a0, 64(sp) +; ZFINX32-NEXT: sw a1, 68(sp) +; ZFINX32-NEXT: sw a2, 72(sp) +; ZFINX32-NEXT: sw a3, 76(sp) ; ZFINX32-NEXT: sw s9, 48(sp) ; ZFINX32-NEXT: sw s10, 52(sp) ; ZFINX32-NEXT: sw s11, 56(sp) @@ -1119,6 +1128,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX32-NEXT: sw t1, 4(sp) ; ZFINX32-NEXT: sw t2, 8(sp) ; ZFINX32-NEXT: sw s0, 12(sp) +; ZFINX32-NEXT: mv a0, a4 ; ZFINX32-NEXT: mv a1, a5 ; ZFINX32-NEXT: mv a2, a6 ; ZFINX32-NEXT: mv a3, a7 @@ -1166,6 +1176,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: mv a7, a3 ; ZFINX64-NEXT: mv a6, a2 ; ZFINX64-NEXT: mv a5, a1 +; ZFINX64-NEXT: mv a4, a0 ; ZFINX64-NEXT: lw t3, 208(sp) ; ZFINX64-NEXT: lw t4, 216(sp) ; ZFINX64-NEXT: lw t5, 224(sp) @@ -1186,14 +1197,14 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: lw s10, 344(sp) ; ZFINX64-NEXT: lw s11, 352(sp) ; ZFINX64-NEXT: lw ra, 360(sp) -; ZFINX64-NEXT: lw a1, 368(sp) -; ZFINX64-NEXT: lw a2, 376(sp) -; ZFINX64-NEXT: lw a3, 384(sp) -; ZFINX64-NEXT: lw a4, 392(sp) -; ZFINX64-NEXT: sw a1, 64(sp) -; ZFINX64-NEXT: sw a2, 68(sp) -; ZFINX64-NEXT: sw a3, 72(sp) -; ZFINX64-NEXT: sw a4, 76(sp) +; ZFINX64-NEXT: lw a0, 368(sp) +; ZFINX64-NEXT: lw a1, 376(sp) +; ZFINX64-NEXT: lw a2, 384(sp) +; ZFINX64-NEXT: lw a3, 392(sp) +; ZFINX64-NEXT: sw a0, 64(sp) +; ZFINX64-NEXT: sw a1, 68(sp) +; ZFINX64-NEXT: sw a2, 72(sp) +; ZFINX64-NEXT: sw a3, 76(sp) ; ZFINX64-NEXT: sw s9, 48(sp) ; ZFINX64-NEXT: sw s10, 52(sp) ; ZFINX64-NEXT: sw s11, 56(sp) @@ -1210,6 +1221,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZFINX64-NEXT: sw t1, 4(sp) ; ZFINX64-NEXT: sw t2, 8(sp) ; ZFINX64-NEXT: sw s0, 12(sp) +; ZFINX64-NEXT: mv a0, a4 ; ZFINX64-NEXT: mv a1, a5 ; ZFINX64-NEXT: mv a2, a6 ; ZFINX64-NEXT: mv a3, a7 @@ -1257,6 +1269,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX32-NEXT: mv a7, a3 ; ZDINX32-NEXT: mv a6, a2 ; ZDINX32-NEXT: mv a5, a1 +; ZDINX32-NEXT: mv a4, a0 ; ZDINX32-NEXT: lw t3, 160(sp) ; ZDINX32-NEXT: lw t4, 164(sp) ; ZDINX32-NEXT: lw t5, 168(sp) @@ -1277,14 +1290,14 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX32-NEXT: lw s10, 228(sp) ; ZDINX32-NEXT: lw s11, 232(sp) ; ZDINX32-NEXT: lw ra, 236(sp) -; ZDINX32-NEXT: lw a1, 240(sp) -; ZDINX32-NEXT: lw a2, 244(sp) -; ZDINX32-NEXT: lw a3, 248(sp) -; ZDINX32-NEXT: lw a4, 252(sp) -; ZDINX32-NEXT: sw a1, 64(sp) -; ZDINX32-NEXT: sw a2, 68(sp) -; ZDINX32-NEXT: sw a3, 72(sp) -; ZDINX32-NEXT: sw a4, 76(sp) +; ZDINX32-NEXT: lw a0, 240(sp) +; ZDINX32-NEXT: lw a1, 244(sp) +; ZDINX32-NEXT: lw a2, 248(sp) +; ZDINX32-NEXT: lw a3, 252(sp) +; ZDINX32-NEXT: sw a0, 64(sp) +; ZDINX32-NEXT: sw a1, 68(sp) +; ZDINX32-NEXT: sw a2, 72(sp) +; ZDINX32-NEXT: sw a3, 76(sp) ; ZDINX32-NEXT: sw s9, 48(sp) ; ZDINX32-NEXT: sw s10, 52(sp) ; ZDINX32-NEXT: sw s11, 56(sp) @@ -1301,6 +1314,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX32-NEXT: sw t1, 4(sp) ; ZDINX32-NEXT: sw t2, 8(sp) ; ZDINX32-NEXT: sw s0, 12(sp) +; ZDINX32-NEXT: mv a0, a4 ; ZDINX32-NEXT: mv a1, a5 ; ZDINX32-NEXT: mv a2, a6 ; ZDINX32-NEXT: mv a3, a7 @@ -1348,6 +1362,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: mv a7, a3 ; ZDINX64-NEXT: mv a6, a2 ; ZDINX64-NEXT: mv a5, a1 +; ZDINX64-NEXT: mv a4, a0 ; ZDINX64-NEXT: lw t3, 208(sp) ; ZDINX64-NEXT: lw t4, 216(sp) ; ZDINX64-NEXT: lw t5, 224(sp) @@ -1368,14 +1383,14 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: lw s10, 344(sp) ; ZDINX64-NEXT: lw s11, 352(sp) ; ZDINX64-NEXT: lw ra, 360(sp) -; ZDINX64-NEXT: lw a1, 368(sp) -; ZDINX64-NEXT: lw a2, 376(sp) -; ZDINX64-NEXT: lw a3, 384(sp) -; ZDINX64-NEXT: lw a4, 392(sp) -; ZDINX64-NEXT: sw a1, 64(sp) -; ZDINX64-NEXT: sw a2, 68(sp) -; ZDINX64-NEXT: sw a3, 72(sp) -; ZDINX64-NEXT: sw a4, 76(sp) +; ZDINX64-NEXT: lw a0, 368(sp) +; ZDINX64-NEXT: lw a1, 376(sp) +; ZDINX64-NEXT: lw a2, 384(sp) +; ZDINX64-NEXT: lw a3, 392(sp) +; ZDINX64-NEXT: sw a0, 64(sp) +; ZDINX64-NEXT: sw a1, 68(sp) +; ZDINX64-NEXT: sw a2, 72(sp) +; ZDINX64-NEXT: sw a3, 76(sp) ; ZDINX64-NEXT: sw s9, 48(sp) ; ZDINX64-NEXT: sw s10, 52(sp) ; ZDINX64-NEXT: sw s11, 56(sp) @@ -1392,6 +1407,7 @@ define float @caller_float_32(<32 x float> %A) nounwind { ; ZDINX64-NEXT: sw t1, 4(sp) ; ZDINX64-NEXT: sw t2, 8(sp) ; ZDINX64-NEXT: sw s0, 12(sp) +; ZDINX64-NEXT: mv a0, a4 ; ZDINX64-NEXT: mv a1, a5 ; ZDINX64-NEXT: mv a2, a6 ; ZDINX64-NEXT: mv a3, a7 diff --git a/llvm/test/CodeGen/RISCV/float-convert.ll b/llvm/test/CodeGen/RISCV/float-convert.ll index fc866d71a3a70..89858af3282d6 100644 --- a/llvm/test/CodeGen/RISCV/float-convert.ll +++ b/llvm/test/CodeGen/RISCV/float-convert.ll @@ -1417,13 +1417,13 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV32IF-LABEL: fcvt_w_s_sat_i16: ; RV32IF: # %bb.0: # %start ; RV32IF-NEXT: feq.s a0, fa0, fa0 -; RV32IF-NEXT: lui a1, %hi(.LCPI24_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI24_0)(a1) ; RV32IF-NEXT: lui a1, 815104 -; RV32IF-NEXT: fmv.w.x fa4, a1 -; RV32IF-NEXT: fmax.s fa4, fa0, fa4 +; RV32IF-NEXT: fmv.w.x fa5, a1 +; RV32IF-NEXT: lui a1, %hi(.LCPI24_0) ; RV32IF-NEXT: neg a0, a0 -; RV32IF-NEXT: fmin.s fa5, fa4, fa5 +; RV32IF-NEXT: fmax.s fa5, fa0, fa5 +; RV32IF-NEXT: flw fa4, %lo(.LCPI24_0)(a1) +; RV32IF-NEXT: fmin.s fa5, fa5, fa4 ; RV32IF-NEXT: fcvt.w.s a1, fa5, rtz ; RV32IF-NEXT: and a0, a0, a1 ; RV32IF-NEXT: ret @@ -1431,13 +1431,13 @@ define signext i16 @fcvt_w_s_sat_i16(float %a) nounwind { ; RV64IF-LABEL: fcvt_w_s_sat_i16: ; RV64IF: # %bb.0: # %start ; RV64IF-NEXT: feq.s a0, fa0, fa0 -; RV64IF-NEXT: lui a1, %hi(.LCPI24_0) -; RV64IF-NEXT: flw fa5, %lo(.LCPI24_0)(a1) ; RV64IF-NEXT: lui a1, 815104 -; RV64IF-NEXT: fmv.w.x fa4, a1 -; RV64IF-NEXT: fmax.s fa4, fa0, fa4 +; RV64IF-NEXT: fmv.w.x fa5, a1 +; RV64IF-NEXT: lui a1, %hi(.LCPI24_0) ; RV64IF-NEXT: neg a0, a0 -; RV64IF-NEXT: fmin.s fa5, fa4, fa5 +; RV64IF-NEXT: fmax.s fa5, fa0, fa5 +; RV64IF-NEXT: flw fa4, %lo(.LCPI24_0)(a1) +; RV64IF-NEXT: fmin.s fa5, fa5, fa4 ; RV64IF-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IF-NEXT: and a0, a0, a1 ; RV64IF-NEXT: ret @@ -1602,21 +1602,21 @@ define zeroext i16 @fcvt_wu_s_i16(float %a) nounwind { define zeroext i16 @fcvt_wu_s_sat_i16(float %a) nounwind { ; RV32IF-LABEL: fcvt_wu_s_sat_i16: ; RV32IF: # %bb.0: # %start +; RV32IF-NEXT: fmv.w.x fa5, zero ; RV32IF-NEXT: lui a0, %hi(.LCPI26_0) -; RV32IF-NEXT: flw fa5, %lo(.LCPI26_0)(a0) -; RV32IF-NEXT: fmv.w.x fa4, zero -; RV32IF-NEXT: fmax.s fa4, fa0, fa4 -; RV32IF-NEXT: fmin.s fa5, fa4, fa5 +; RV32IF-NEXT: fmax.s fa5, fa0, fa5 +; RV32IF-NEXT: flw fa4, %lo(.LCPI26_0)(a0) +; RV32IF-NEXT: fmin.s fa5, fa5, fa4 ; RV32IF-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32IF-NEXT: ret ; ; RV64IF-LABEL: fcvt_wu_s_sat_i16: ; RV64IF: # %bb.0: # %start +; RV64IF-NEXT: fmv.w.x fa5, zero ; RV64IF-NEXT: lui a0, %hi(.LCPI26_0) -; RV64IF-NEXT: flw fa5, %lo(.LCPI26_0)(a0) -; RV64IF-NEXT: fmv.w.x fa4, zero -; RV64IF-NEXT: fmax.s fa4, fa0, fa4 -; RV64IF-NEXT: fmin.s fa5, fa4, fa5 +; RV64IF-NEXT: fmax.s fa5, fa0, fa5 +; RV64IF-NEXT: flw fa4, %lo(.LCPI26_0)(a0) +; RV64IF-NEXT: fmin.s fa5, fa5, fa4 ; RV64IF-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64IF-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll index 0cbfc96bf485e..9b3a643e59e68 100644 --- a/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll +++ b/llvm/test/CodeGen/RISCV/float-fcmp-strict.ll @@ -234,8 +234,8 @@ define i32 @fcmp_one(float %a, float %b) nounwind strictfp { ; CHECKIF-NEXT: frflags a0 ; CHECKIF-NEXT: flt.s a2, fa1, fa0 ; CHECKIF-NEXT: fsflags a0 -; CHECKIF-NEXT: or a0, a2, a1 ; CHECKIF-NEXT: feq.s zero, fa1, fa0 +; CHECKIF-NEXT: or a0, a2, a1 ; CHECKIF-NEXT: ret ; ; CHECKIZFINX-LABEL: fcmp_one: @@ -247,9 +247,8 @@ define i32 @fcmp_one(float %a, float %b) nounwind strictfp { ; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: flt.s a4, a1, a0 ; CHECKIZFINX-NEXT: fsflags a2 -; CHECKIZFINX-NEXT: or a2, a4, a3 ; CHECKIZFINX-NEXT: feq.s zero, a1, a0 -; CHECKIZFINX-NEXT: mv a0, a2 +; CHECKIZFINX-NEXT: or a0, a4, a3 ; CHECKIZFINX-NEXT: ret ; ; RV32I-LABEL: fcmp_one: @@ -353,9 +352,9 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp { ; CHECKIF-NEXT: frflags a0 ; CHECKIF-NEXT: flt.s a2, fa1, fa0 ; CHECKIF-NEXT: fsflags a0 +; CHECKIF-NEXT: feq.s zero, fa1, fa0 ; CHECKIF-NEXT: or a1, a2, a1 ; CHECKIF-NEXT: xori a0, a1, 1 -; CHECKIF-NEXT: feq.s zero, fa1, fa0 ; CHECKIF-NEXT: ret ; ; CHECKIZFINX-LABEL: fcmp_ueq: @@ -367,10 +366,9 @@ define i32 @fcmp_ueq(float %a, float %b) nounwind strictfp { ; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: flt.s a4, a1, a0 ; CHECKIZFINX-NEXT: fsflags a2 -; CHECKIZFINX-NEXT: or a3, a4, a3 -; CHECKIZFINX-NEXT: xori a2, a3, 1 ; CHECKIZFINX-NEXT: feq.s zero, a1, a0 -; CHECKIZFINX-NEXT: mv a0, a2 +; CHECKIZFINX-NEXT: or a3, a4, a3 +; CHECKIZFINX-NEXT: xori a0, a3, 1 ; CHECKIZFINX-NEXT: ret ; ; RV32I-LABEL: fcmp_ueq: @@ -429,8 +427,8 @@ define i32 @fcmp_ugt(float %a, float %b) nounwind strictfp { ; CHECKIF-NEXT: frflags a0 ; CHECKIF-NEXT: fle.s a1, fa0, fa1 ; CHECKIF-NEXT: fsflags a0 -; CHECKIF-NEXT: xori a0, a1, 1 ; CHECKIF-NEXT: feq.s zero, fa0, fa1 +; CHECKIF-NEXT: xori a0, a1, 1 ; CHECKIF-NEXT: ret ; ; CHECKIZFINX-LABEL: fcmp_ugt: @@ -438,9 +436,8 @@ define i32 @fcmp_ugt(float %a, float %b) nounwind strictfp { ; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: fle.s a3, a0, a1 ; CHECKIZFINX-NEXT: fsflags a2 -; CHECKIZFINX-NEXT: xori a2, a3, 1 ; CHECKIZFINX-NEXT: feq.s zero, a0, a1 -; CHECKIZFINX-NEXT: mv a0, a2 +; CHECKIZFINX-NEXT: xori a0, a3, 1 ; CHECKIZFINX-NEXT: ret ; ; RV32I-LABEL: fcmp_ugt: @@ -473,8 +470,8 @@ define i32 @fcmp_uge(float %a, float %b) nounwind strictfp { ; CHECKIF-NEXT: frflags a0 ; CHECKIF-NEXT: flt.s a1, fa0, fa1 ; CHECKIF-NEXT: fsflags a0 -; CHECKIF-NEXT: xori a0, a1, 1 ; CHECKIF-NEXT: feq.s zero, fa0, fa1 +; CHECKIF-NEXT: xori a0, a1, 1 ; CHECKIF-NEXT: ret ; ; CHECKIZFINX-LABEL: fcmp_uge: @@ -482,9 +479,8 @@ define i32 @fcmp_uge(float %a, float %b) nounwind strictfp { ; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: flt.s a3, a0, a1 ; CHECKIZFINX-NEXT: fsflags a2 -; CHECKIZFINX-NEXT: xori a2, a3, 1 ; CHECKIZFINX-NEXT: feq.s zero, a0, a1 -; CHECKIZFINX-NEXT: mv a0, a2 +; CHECKIZFINX-NEXT: xori a0, a3, 1 ; CHECKIZFINX-NEXT: ret ; ; RV32I-LABEL: fcmp_uge: @@ -519,8 +515,8 @@ define i32 @fcmp_ult(float %a, float %b) nounwind strictfp { ; CHECKIF-NEXT: frflags a0 ; CHECKIF-NEXT: fle.s a1, fa1, fa0 ; CHECKIF-NEXT: fsflags a0 -; CHECKIF-NEXT: xori a0, a1, 1 ; CHECKIF-NEXT: feq.s zero, fa1, fa0 +; CHECKIF-NEXT: xori a0, a1, 1 ; CHECKIF-NEXT: ret ; ; CHECKIZFINX-LABEL: fcmp_ult: @@ -528,9 +524,8 @@ define i32 @fcmp_ult(float %a, float %b) nounwind strictfp { ; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: fle.s a3, a1, a0 ; CHECKIZFINX-NEXT: fsflags a2 -; CHECKIZFINX-NEXT: xori a2, a3, 1 ; CHECKIZFINX-NEXT: feq.s zero, a1, a0 -; CHECKIZFINX-NEXT: mv a0, a2 +; CHECKIZFINX-NEXT: xori a0, a3, 1 ; CHECKIZFINX-NEXT: ret ; ; RV32I-LABEL: fcmp_ult: @@ -563,8 +558,8 @@ define i32 @fcmp_ule(float %a, float %b) nounwind strictfp { ; CHECKIF-NEXT: frflags a0 ; CHECKIF-NEXT: flt.s a1, fa1, fa0 ; CHECKIF-NEXT: fsflags a0 -; CHECKIF-NEXT: xori a0, a1, 1 ; CHECKIF-NEXT: feq.s zero, fa1, fa0 +; CHECKIF-NEXT: xori a0, a1, 1 ; CHECKIF-NEXT: ret ; ; CHECKIZFINX-LABEL: fcmp_ule: @@ -572,9 +567,8 @@ define i32 @fcmp_ule(float %a, float %b) nounwind strictfp { ; CHECKIZFINX-NEXT: frflags a2 ; CHECKIZFINX-NEXT: flt.s a3, a1, a0 ; CHECKIZFINX-NEXT: fsflags a2 -; CHECKIZFINX-NEXT: xori a2, a3, 1 ; CHECKIZFINX-NEXT: feq.s zero, a1, a0 -; CHECKIZFINX-NEXT: mv a0, a2 +; CHECKIZFINX-NEXT: xori a0, a3, 1 ; CHECKIZFINX-NEXT: ret ; ; RV32I-LABEL: fcmp_ule: diff --git a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll index a2ff0d33e2d31..5ec0335972394 100644 --- a/llvm/test/CodeGen/RISCV/float-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/float-select-fcmp.ll @@ -387,12 +387,12 @@ define i32 @i32_select_fcmp_oeq(float %a, float %b, i32 %c, i32 %d) nounwind { ; ; CHECKZFINX-LABEL: i32_select_fcmp_oeq: ; CHECKZFINX: # %bb.0: -; CHECKZFINX-NEXT: feq.s a1, a0, a1 -; CHECKZFINX-NEXT: mv a0, a2 -; CHECKZFINX-NEXT: bnez a1, .LBB16_2 +; CHECKZFINX-NEXT: feq.s a0, a0, a1 +; CHECKZFINX-NEXT: bnez a0, .LBB16_2 ; CHECKZFINX-NEXT: # %bb.1: -; CHECKZFINX-NEXT: mv a0, a3 +; CHECKZFINX-NEXT: mv a2, a3 ; CHECKZFINX-NEXT: .LBB16_2: +; CHECKZFINX-NEXT: mv a0, a2 ; CHECKZFINX-NEXT: ret %1 = fcmp oeq float %a, %b %2 = select i1 %1, i32 %c, i32 %d diff --git a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll index b8dc7804c4908..59ba3652c89e9 100644 --- a/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll +++ b/llvm/test/CodeGen/RISCV/fold-addi-loadstore.ll @@ -929,19 +929,19 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: mv a0, s0 ; RV32I-NEXT: call f +; RV32I-NEXT: addi s5, s5, 1 +; RV32I-NEXT: seqz a0, s5 +; RV32I-NEXT: add s6, s6, a0 ; RV32I-NEXT: lw a0, 8(s7) ; RV32I-NEXT: lw a1, 12(s7) -; RV32I-NEXT: addi s5, s5, 1 -; RV32I-NEXT: seqz a2, s5 -; RV32I-NEXT: add s6, s6, a2 -; RV32I-NEXT: xor a2, s5, s2 ; RV32I-NEXT: add a1, a1, s4 -; RV32I-NEXT: xor a3, s6, s1 -; RV32I-NEXT: or a2, a2, a3 +; RV32I-NEXT: xor a2, s5, s2 ; RV32I-NEXT: add s3, a0, s3 ; RV32I-NEXT: sltu s4, s3, a0 ; RV32I-NEXT: add s4, a1, s4 -; RV32I-NEXT: bnez a2, .LBB20_5 +; RV32I-NEXT: xor a0, s6, s1 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: bnez a0, .LBB20_5 ; RV32I-NEXT: .LBB20_6: # %for.cond.cleanup ; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: mv a1, s4 @@ -994,19 +994,19 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV32I-MEDIUM-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-MEDIUM-NEXT: mv a0, s0 ; RV32I-MEDIUM-NEXT: call f +; RV32I-MEDIUM-NEXT: addi s5, s5, 1 +; RV32I-MEDIUM-NEXT: seqz a0, s5 +; RV32I-MEDIUM-NEXT: add s6, s6, a0 ; RV32I-MEDIUM-NEXT: lw a0, 8(s7) ; RV32I-MEDIUM-NEXT: lw a1, 12(s7) -; RV32I-MEDIUM-NEXT: addi s5, s5, 1 -; RV32I-MEDIUM-NEXT: seqz a2, s5 -; RV32I-MEDIUM-NEXT: add s6, s6, a2 -; RV32I-MEDIUM-NEXT: xor a2, s5, s2 ; RV32I-MEDIUM-NEXT: add a1, a1, s4 -; RV32I-MEDIUM-NEXT: xor a3, s6, s1 -; RV32I-MEDIUM-NEXT: or a2, a2, a3 +; RV32I-MEDIUM-NEXT: xor a2, s5, s2 ; RV32I-MEDIUM-NEXT: add s3, a0, s3 ; RV32I-MEDIUM-NEXT: sltu s4, s3, a0 ; RV32I-MEDIUM-NEXT: add s4, a1, s4 -; RV32I-MEDIUM-NEXT: bnez a2, .LBB20_5 +; RV32I-MEDIUM-NEXT: xor a0, s6, s1 +; RV32I-MEDIUM-NEXT: or a0, a2, a0 +; RV32I-MEDIUM-NEXT: bnez a0, .LBB20_5 ; RV32I-MEDIUM-NEXT: .LBB20_6: # %for.cond.cleanup ; RV32I-MEDIUM-NEXT: mv a0, s3 ; RV32I-MEDIUM-NEXT: mv a1, s4 @@ -1042,8 +1042,8 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV64I-NEXT: mv a0, s0 ; RV64I-NEXT: call f ; RV64I-NEXT: ld a0, 8(s3) -; RV64I-NEXT: addi s1, s1, -1 ; RV64I-NEXT: add s2, a0, s2 +; RV64I-NEXT: addi s1, s1, -1 ; RV64I-NEXT: bnez s1, .LBB20_2 ; RV64I-NEXT: j .LBB20_4 ; RV64I-NEXT: .LBB20_3: @@ -1078,8 +1078,8 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV64I-MEDIUM-NEXT: mv a0, s0 ; RV64I-MEDIUM-NEXT: call f ; RV64I-MEDIUM-NEXT: ld a0, 8(s3) -; RV64I-MEDIUM-NEXT: addi s1, s1, -1 ; RV64I-MEDIUM-NEXT: add s2, a0, s2 +; RV64I-MEDIUM-NEXT: addi s1, s1, -1 ; RV64I-MEDIUM-NEXT: bnez s1, .LBB20_2 ; RV64I-MEDIUM-NEXT: j .LBB20_4 ; RV64I-MEDIUM-NEXT: .LBB20_3: @@ -1108,18 +1108,18 @@ define i64 @fold_addi_from_different_bb(i64 %k, i64 %n, ptr %a) nounwind { ; RV64I-LARGE-NEXT: mv s0, a2 ; RV64I-LARGE-NEXT: mv s1, a1 ; RV64I-LARGE-NEXT: li s2, 0 +; RV64I-LARGE-NEXT: slli a0, a0, 4 ; RV64I-LARGE-NEXT: .Lpcrel_hi14: ; RV64I-LARGE-NEXT: auipc a1, %pcrel_hi(.LCPI20_0) -; RV64I-LARGE-NEXT: ld s3, %pcrel_lo(.Lpcrel_hi14)(a1) -; RV64I-LARGE-NEXT: slli a0, a0, 4 -; RV64I-LARGE-NEXT: add s4, a2, a0 +; RV64I-LARGE-NEXT: add s3, a2, a0 +; RV64I-LARGE-NEXT: ld s4, %pcrel_lo(.Lpcrel_hi14)(a1) ; RV64I-LARGE-NEXT: .LBB20_2: # %for.body ; RV64I-LARGE-NEXT: # =>This Inner Loop Header: Depth=1 ; RV64I-LARGE-NEXT: mv a0, s0 -; RV64I-LARGE-NEXT: jalr s3 -; RV64I-LARGE-NEXT: ld a0, 8(s4) -; RV64I-LARGE-NEXT: addi s1, s1, -1 +; RV64I-LARGE-NEXT: jalr s4 +; RV64I-LARGE-NEXT: ld a0, 8(s3) ; RV64I-LARGE-NEXT: add s2, a0, s2 +; RV64I-LARGE-NEXT: addi s1, s1, -1 ; RV64I-LARGE-NEXT: bnez s1, .LBB20_2 ; RV64I-LARGE-NEXT: j .LBB20_4 ; RV64I-LARGE-NEXT: .LBB20_3: diff --git a/llvm/test/CodeGen/RISCV/forced-atomics.ll b/llvm/test/CodeGen/RISCV/forced-atomics.ll index e7719dc70660b..3ea9c4c6ad754 100644 --- a/llvm/test/CodeGen/RISCV/forced-atomics.ll +++ b/llvm/test/CodeGen/RISCV/forced-atomics.ll @@ -1357,28 +1357,28 @@ define i32 @rmw32_max_seq_cst(ptr %p) nounwind { ; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NO-ATOMIC-NEXT: mv s0, a0 -; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0) +; RV32-NO-ATOMIC-NEXT: lw a0, 0(a0) ; RV32-NO-ATOMIC-NEXT: j .LBB23_2 ; RV32-NO-ATOMIC-NEXT: .LBB23_1: # %atomicrmw.start ; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1 -; RV32-NO-ATOMIC-NEXT: sw a1, 4(sp) +; RV32-NO-ATOMIC-NEXT: sw a0, 4(sp) ; RV32-NO-ATOMIC-NEXT: addi a1, sp, 4 ; RV32-NO-ATOMIC-NEXT: li a3, 5 ; RV32-NO-ATOMIC-NEXT: li a4, 5 ; RV32-NO-ATOMIC-NEXT: mv a0, s0 ; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 -; RV32-NO-ATOMIC-NEXT: lw a1, 4(sp) -; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB23_4 +; RV32-NO-ATOMIC-NEXT: mv a1, a0 +; RV32-NO-ATOMIC-NEXT: lw a0, 4(sp) +; RV32-NO-ATOMIC-NEXT: bnez a1, .LBB23_4 ; RV32-NO-ATOMIC-NEXT: .LBB23_2: # %atomicrmw.start ; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NO-ATOMIC-NEXT: mv a2, a1 -; RV32-NO-ATOMIC-NEXT: bgtz a1, .LBB23_1 +; RV32-NO-ATOMIC-NEXT: mv a2, a0 +; RV32-NO-ATOMIC-NEXT: bgtz a0, .LBB23_1 ; RV32-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1 ; RV32-NO-ATOMIC-NEXT: li a2, 1 ; RV32-NO-ATOMIC-NEXT: j .LBB23_1 ; RV32-NO-ATOMIC-NEXT: .LBB23_4: # %atomicrmw.end -; RV32-NO-ATOMIC-NEXT: mv a0, a1 ; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16 @@ -1410,29 +1410,29 @@ define i32 @rmw32_max_seq_cst(ptr %p) nounwind { ; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: mv s0, a0 -; RV64-NO-ATOMIC-NEXT: lw a1, 0(a0) +; RV64-NO-ATOMIC-NEXT: lw a0, 0(a0) ; RV64-NO-ATOMIC-NEXT: j .LBB23_2 ; RV64-NO-ATOMIC-NEXT: .LBB23_1: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1 -; RV64-NO-ATOMIC-NEXT: sw a1, 12(sp) +; RV64-NO-ATOMIC-NEXT: sw a0, 12(sp) ; RV64-NO-ATOMIC-NEXT: addi a1, sp, 12 ; RV64-NO-ATOMIC-NEXT: li a3, 5 ; RV64-NO-ATOMIC-NEXT: li a4, 5 ; RV64-NO-ATOMIC-NEXT: mv a0, s0 ; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 -; RV64-NO-ATOMIC-NEXT: lw a1, 12(sp) -; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB23_4 +; RV64-NO-ATOMIC-NEXT: mv a1, a0 +; RV64-NO-ATOMIC-NEXT: lw a0, 12(sp) +; RV64-NO-ATOMIC-NEXT: bnez a1, .LBB23_4 ; RV64-NO-ATOMIC-NEXT: .LBB23_2: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64-NO-ATOMIC-NEXT: li a0, 1 -; RV64-NO-ATOMIC-NEXT: mv a2, a1 -; RV64-NO-ATOMIC-NEXT: blt a0, a1, .LBB23_1 +; RV64-NO-ATOMIC-NEXT: li a1, 1 +; RV64-NO-ATOMIC-NEXT: mv a2, a0 +; RV64-NO-ATOMIC-NEXT: blt a1, a0, .LBB23_1 ; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB23_2 Depth=1 ; RV64-NO-ATOMIC-NEXT: li a2, 1 ; RV64-NO-ATOMIC-NEXT: j .LBB23_1 ; RV64-NO-ATOMIC-NEXT: .LBB23_4: # %atomicrmw.end -; RV64-NO-ATOMIC-NEXT: mv a0, a1 ; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32 @@ -1469,29 +1469,29 @@ define i32 @rmw32_min_seq_cst(ptr %p) nounwind { ; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32-NO-ATOMIC-NEXT: mv s0, a0 -; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0) +; RV32-NO-ATOMIC-NEXT: lw a0, 0(a0) ; RV32-NO-ATOMIC-NEXT: li s1, 2 ; RV32-NO-ATOMIC-NEXT: j .LBB24_2 ; RV32-NO-ATOMIC-NEXT: .LBB24_1: # %atomicrmw.start ; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1 -; RV32-NO-ATOMIC-NEXT: sw a1, 0(sp) +; RV32-NO-ATOMIC-NEXT: sw a0, 0(sp) ; RV32-NO-ATOMIC-NEXT: mv a1, sp ; RV32-NO-ATOMIC-NEXT: li a3, 5 ; RV32-NO-ATOMIC-NEXT: li a4, 5 ; RV32-NO-ATOMIC-NEXT: mv a0, s0 ; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 -; RV32-NO-ATOMIC-NEXT: lw a1, 0(sp) -; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB24_4 +; RV32-NO-ATOMIC-NEXT: mv a1, a0 +; RV32-NO-ATOMIC-NEXT: lw a0, 0(sp) +; RV32-NO-ATOMIC-NEXT: bnez a1, .LBB24_4 ; RV32-NO-ATOMIC-NEXT: .LBB24_2: # %atomicrmw.start ; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NO-ATOMIC-NEXT: mv a2, a1 -; RV32-NO-ATOMIC-NEXT: blt a1, s1, .LBB24_1 +; RV32-NO-ATOMIC-NEXT: mv a2, a0 +; RV32-NO-ATOMIC-NEXT: blt a0, s1, .LBB24_1 ; RV32-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1 ; RV32-NO-ATOMIC-NEXT: li a2, 1 ; RV32-NO-ATOMIC-NEXT: j .LBB24_1 ; RV32-NO-ATOMIC-NEXT: .LBB24_4: # %atomicrmw.end -; RV32-NO-ATOMIC-NEXT: mv a0, a1 ; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1525,29 +1525,29 @@ define i32 @rmw32_min_seq_cst(ptr %p) nounwind { ; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: mv s0, a0 -; RV64-NO-ATOMIC-NEXT: lw a1, 0(a0) +; RV64-NO-ATOMIC-NEXT: lw a0, 0(a0) ; RV64-NO-ATOMIC-NEXT: li s1, 2 ; RV64-NO-ATOMIC-NEXT: j .LBB24_2 ; RV64-NO-ATOMIC-NEXT: .LBB24_1: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1 -; RV64-NO-ATOMIC-NEXT: sw a1, 4(sp) +; RV64-NO-ATOMIC-NEXT: sw a0, 4(sp) ; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4 ; RV64-NO-ATOMIC-NEXT: li a3, 5 ; RV64-NO-ATOMIC-NEXT: li a4, 5 ; RV64-NO-ATOMIC-NEXT: mv a0, s0 ; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 -; RV64-NO-ATOMIC-NEXT: lw a1, 4(sp) -; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB24_4 +; RV64-NO-ATOMIC-NEXT: mv a1, a0 +; RV64-NO-ATOMIC-NEXT: lw a0, 4(sp) +; RV64-NO-ATOMIC-NEXT: bnez a1, .LBB24_4 ; RV64-NO-ATOMIC-NEXT: .LBB24_2: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64-NO-ATOMIC-NEXT: mv a2, a1 -; RV64-NO-ATOMIC-NEXT: blt a1, s1, .LBB24_1 +; RV64-NO-ATOMIC-NEXT: mv a2, a0 +; RV64-NO-ATOMIC-NEXT: blt a0, s1, .LBB24_1 ; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB24_2 Depth=1 ; RV64-NO-ATOMIC-NEXT: li a2, 1 ; RV64-NO-ATOMIC-NEXT: j .LBB24_1 ; RV64-NO-ATOMIC-NEXT: .LBB24_4: # %atomicrmw.end -; RV64-NO-ATOMIC-NEXT: mv a0, a1 ; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -1584,21 +1584,21 @@ define i32 @rmw32_umax_seq_cst(ptr %p) nounwind { ; RV32-NO-ATOMIC-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NO-ATOMIC-NEXT: mv s0, a0 -; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0) +; RV32-NO-ATOMIC-NEXT: lw a0, 0(a0) ; RV32-NO-ATOMIC-NEXT: .LBB25_1: # %atomicrmw.start ; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NO-ATOMIC-NEXT: seqz a2, a1 -; RV32-NO-ATOMIC-NEXT: add a2, a1, a2 -; RV32-NO-ATOMIC-NEXT: sw a1, 4(sp) +; RV32-NO-ATOMIC-NEXT: seqz a2, a0 +; RV32-NO-ATOMIC-NEXT: add a2, a0, a2 +; RV32-NO-ATOMIC-NEXT: sw a0, 4(sp) ; RV32-NO-ATOMIC-NEXT: addi a1, sp, 4 ; RV32-NO-ATOMIC-NEXT: li a3, 5 ; RV32-NO-ATOMIC-NEXT: li a4, 5 ; RV32-NO-ATOMIC-NEXT: mv a0, s0 ; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 -; RV32-NO-ATOMIC-NEXT: lw a1, 4(sp) -; RV32-NO-ATOMIC-NEXT: beqz a0, .LBB25_1 +; RV32-NO-ATOMIC-NEXT: mv a1, a0 +; RV32-NO-ATOMIC-NEXT: lw a0, 4(sp) +; RV32-NO-ATOMIC-NEXT: beqz a1, .LBB25_1 ; RV32-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end -; RV32-NO-ATOMIC-NEXT: mv a0, a1 ; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NO-ATOMIC-NEXT: addi sp, sp, 16 @@ -1630,29 +1630,29 @@ define i32 @rmw32_umax_seq_cst(ptr %p) nounwind { ; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: mv s0, a0 -; RV64-NO-ATOMIC-NEXT: lw a1, 0(a0) +; RV64-NO-ATOMIC-NEXT: lw a0, 0(a0) ; RV64-NO-ATOMIC-NEXT: j .LBB25_2 ; RV64-NO-ATOMIC-NEXT: .LBB25_1: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB25_2 Depth=1 -; RV64-NO-ATOMIC-NEXT: sw a1, 12(sp) +; RV64-NO-ATOMIC-NEXT: sw a0, 12(sp) ; RV64-NO-ATOMIC-NEXT: addi a1, sp, 12 ; RV64-NO-ATOMIC-NEXT: li a3, 5 ; RV64-NO-ATOMIC-NEXT: li a4, 5 ; RV64-NO-ATOMIC-NEXT: mv a0, s0 ; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 -; RV64-NO-ATOMIC-NEXT: lw a1, 12(sp) -; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB25_4 +; RV64-NO-ATOMIC-NEXT: mv a1, a0 +; RV64-NO-ATOMIC-NEXT: lw a0, 12(sp) +; RV64-NO-ATOMIC-NEXT: bnez a1, .LBB25_4 ; RV64-NO-ATOMIC-NEXT: .LBB25_2: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64-NO-ATOMIC-NEXT: li a0, 1 -; RV64-NO-ATOMIC-NEXT: mv a2, a1 -; RV64-NO-ATOMIC-NEXT: bltu a0, a1, .LBB25_1 +; RV64-NO-ATOMIC-NEXT: li a1, 1 +; RV64-NO-ATOMIC-NEXT: mv a2, a0 +; RV64-NO-ATOMIC-NEXT: bltu a1, a0, .LBB25_1 ; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB25_2 Depth=1 ; RV64-NO-ATOMIC-NEXT: li a2, 1 ; RV64-NO-ATOMIC-NEXT: j .LBB25_1 ; RV64-NO-ATOMIC-NEXT: .LBB25_4: # %atomicrmw.end -; RV64-NO-ATOMIC-NEXT: mv a0, a1 ; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32 @@ -1689,29 +1689,29 @@ define i32 @rmw32_umin_seq_cst(ptr %p) nounwind { ; RV32-NO-ATOMIC-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NO-ATOMIC-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32-NO-ATOMIC-NEXT: mv s0, a0 -; RV32-NO-ATOMIC-NEXT: lw a1, 0(a0) +; RV32-NO-ATOMIC-NEXT: lw a0, 0(a0) ; RV32-NO-ATOMIC-NEXT: li s1, 2 ; RV32-NO-ATOMIC-NEXT: j .LBB26_2 ; RV32-NO-ATOMIC-NEXT: .LBB26_1: # %atomicrmw.start ; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1 -; RV32-NO-ATOMIC-NEXT: sw a1, 0(sp) +; RV32-NO-ATOMIC-NEXT: sw a0, 0(sp) ; RV32-NO-ATOMIC-NEXT: mv a1, sp ; RV32-NO-ATOMIC-NEXT: li a3, 5 ; RV32-NO-ATOMIC-NEXT: li a4, 5 ; RV32-NO-ATOMIC-NEXT: mv a0, s0 ; RV32-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 -; RV32-NO-ATOMIC-NEXT: lw a1, 0(sp) -; RV32-NO-ATOMIC-NEXT: bnez a0, .LBB26_4 +; RV32-NO-ATOMIC-NEXT: mv a1, a0 +; RV32-NO-ATOMIC-NEXT: lw a0, 0(sp) +; RV32-NO-ATOMIC-NEXT: bnez a1, .LBB26_4 ; RV32-NO-ATOMIC-NEXT: .LBB26_2: # %atomicrmw.start ; RV32-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NO-ATOMIC-NEXT: mv a2, a1 -; RV32-NO-ATOMIC-NEXT: bltu a1, s1, .LBB26_1 +; RV32-NO-ATOMIC-NEXT: mv a2, a0 +; RV32-NO-ATOMIC-NEXT: bltu a0, s1, .LBB26_1 ; RV32-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1 ; RV32-NO-ATOMIC-NEXT: li a2, 1 ; RV32-NO-ATOMIC-NEXT: j .LBB26_1 ; RV32-NO-ATOMIC-NEXT: .LBB26_4: # %atomicrmw.end -; RV32-NO-ATOMIC-NEXT: mv a0, a1 ; RV32-NO-ATOMIC-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NO-ATOMIC-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NO-ATOMIC-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -1745,29 +1745,29 @@ define i32 @rmw32_umin_seq_cst(ptr %p) nounwind { ; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: mv s0, a0 -; RV64-NO-ATOMIC-NEXT: lw a1, 0(a0) +; RV64-NO-ATOMIC-NEXT: lw a0, 0(a0) ; RV64-NO-ATOMIC-NEXT: li s1, 2 ; RV64-NO-ATOMIC-NEXT: j .LBB26_2 ; RV64-NO-ATOMIC-NEXT: .LBB26_1: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1 -; RV64-NO-ATOMIC-NEXT: sw a1, 4(sp) +; RV64-NO-ATOMIC-NEXT: sw a0, 4(sp) ; RV64-NO-ATOMIC-NEXT: addi a1, sp, 4 ; RV64-NO-ATOMIC-NEXT: li a3, 5 ; RV64-NO-ATOMIC-NEXT: li a4, 5 ; RV64-NO-ATOMIC-NEXT: mv a0, s0 ; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_4 -; RV64-NO-ATOMIC-NEXT: lw a1, 4(sp) -; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB26_4 +; RV64-NO-ATOMIC-NEXT: mv a1, a0 +; RV64-NO-ATOMIC-NEXT: lw a0, 4(sp) +; RV64-NO-ATOMIC-NEXT: bnez a1, .LBB26_4 ; RV64-NO-ATOMIC-NEXT: .LBB26_2: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64-NO-ATOMIC-NEXT: mv a2, a1 -; RV64-NO-ATOMIC-NEXT: bltu a1, s1, .LBB26_1 +; RV64-NO-ATOMIC-NEXT: mv a2, a0 +; RV64-NO-ATOMIC-NEXT: bltu a0, s1, .LBB26_1 ; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB26_2 Depth=1 ; RV64-NO-ATOMIC-NEXT: li a2, 1 ; RV64-NO-ATOMIC-NEXT: j .LBB26_1 ; RV64-NO-ATOMIC-NEXT: .LBB26_4: # %atomicrmw.end -; RV64-NO-ATOMIC-NEXT: mv a0, a1 ; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3348,43 +3348,43 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a1, 4(a0) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: lw a1, 4(s0) ; RV32-NEXT: j .LBB49_2 ; RV32-NEXT: .LBB49_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV32-NEXT: neg a3, a0 +; RV32-NEXT: neg a3, a3 ; RV32-NEXT: and a3, a3, a1 -; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 0(sp) +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: lw a0, 0(sp) ; RV32-NEXT: lw a1, 4(sp) -; RV32-NEXT: bnez a0, .LBB49_6 +; RV32-NEXT: bnez a2, .LBB49_6 ; RV32-NEXT: .LBB49_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: beqz a1, .LBB49_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV32-NEXT: sgtz a0, a1 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: bnez a0, .LBB49_1 +; RV32-NEXT: sgtz a3, a1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bnez a3, .LBB49_1 ; RV32-NEXT: j .LBB49_5 ; RV32-NEXT: .LBB49_4: # in Loop: Header=BB49_2 Depth=1 -; RV32-NEXT: sltiu a0, a4, 2 -; RV32-NEXT: xori a0, a0, 1 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: bnez a0, .LBB49_1 +; RV32-NEXT: sltiu a2, a0, 2 +; RV32-NEXT: xori a3, a2, 1 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bnez a3, .LBB49_1 ; RV32-NEXT: .LBB49_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB49_2 Depth=1 ; RV32-NEXT: li a2, 1 ; RV32-NEXT: j .LBB49_1 ; RV32-NEXT: .LBB49_6: # %atomicrmw.end -; RV32-NEXT: mv a0, a4 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 @@ -3396,28 +3396,28 @@ define i64 @rmw64_max_seq_cst(ptr %p) nounwind { ; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: mv s0, a0 -; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0) +; RV64-NO-ATOMIC-NEXT: ld a0, 0(a0) ; RV64-NO-ATOMIC-NEXT: j .LBB49_2 ; RV64-NO-ATOMIC-NEXT: .LBB49_1: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB49_2 Depth=1 -; RV64-NO-ATOMIC-NEXT: sd a1, 8(sp) +; RV64-NO-ATOMIC-NEXT: sd a0, 8(sp) ; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8 ; RV64-NO-ATOMIC-NEXT: li a3, 5 ; RV64-NO-ATOMIC-NEXT: li a4, 5 ; RV64-NO-ATOMIC-NEXT: mv a0, s0 ; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 -; RV64-NO-ATOMIC-NEXT: ld a1, 8(sp) -; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB49_4 +; RV64-NO-ATOMIC-NEXT: mv a1, a0 +; RV64-NO-ATOMIC-NEXT: ld a0, 8(sp) +; RV64-NO-ATOMIC-NEXT: bnez a1, .LBB49_4 ; RV64-NO-ATOMIC-NEXT: .LBB49_2: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64-NO-ATOMIC-NEXT: mv a2, a1 -; RV64-NO-ATOMIC-NEXT: bgtz a1, .LBB49_1 +; RV64-NO-ATOMIC-NEXT: mv a2, a0 +; RV64-NO-ATOMIC-NEXT: bgtz a0, .LBB49_1 ; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB49_2 Depth=1 ; RV64-NO-ATOMIC-NEXT: li a2, 1 ; RV64-NO-ATOMIC-NEXT: j .LBB49_1 ; RV64-NO-ATOMIC-NEXT: .LBB49_4: # %atomicrmw.end -; RV64-NO-ATOMIC-NEXT: mv a0, a1 ; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32 @@ -3453,42 +3453,42 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a1, 4(a0) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: lw a1, 4(s0) ; RV32-NEXT: j .LBB50_2 ; RV32-NEXT: .LBB50_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV32-NEXT: neg a3, a0 +; RV32-NEXT: neg a3, a3 ; RV32-NEXT: and a3, a3, a1 -; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 0(sp) +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: lw a0, 0(sp) ; RV32-NEXT: lw a1, 4(sp) -; RV32-NEXT: bnez a0, .LBB50_6 +; RV32-NEXT: bnez a2, .LBB50_6 ; RV32-NEXT: .LBB50_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: beqz a1, .LBB50_4 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV32-NEXT: slti a0, a1, 0 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: bnez a0, .LBB50_1 +; RV32-NEXT: slti a3, a1, 0 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bnez a3, .LBB50_1 ; RV32-NEXT: j .LBB50_5 ; RV32-NEXT: .LBB50_4: # in Loop: Header=BB50_2 Depth=1 -; RV32-NEXT: sltiu a0, a4, 2 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: bnez a0, .LBB50_1 +; RV32-NEXT: sltiu a3, a0, 2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bnez a3, .LBB50_1 ; RV32-NEXT: .LBB50_5: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB50_2 Depth=1 ; RV32-NEXT: li a2, 1 ; RV32-NEXT: j .LBB50_1 ; RV32-NEXT: .LBB50_6: # %atomicrmw.end -; RV32-NEXT: mv a0, a4 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 @@ -3501,29 +3501,29 @@ define i64 @rmw64_min_seq_cst(ptr %p) nounwind { ; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: mv s0, a0 -; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0) +; RV64-NO-ATOMIC-NEXT: ld a0, 0(a0) ; RV64-NO-ATOMIC-NEXT: li s1, 2 ; RV64-NO-ATOMIC-NEXT: j .LBB50_2 ; RV64-NO-ATOMIC-NEXT: .LBB50_1: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB50_2 Depth=1 -; RV64-NO-ATOMIC-NEXT: sd a1, 0(sp) +; RV64-NO-ATOMIC-NEXT: sd a0, 0(sp) ; RV64-NO-ATOMIC-NEXT: mv a1, sp ; RV64-NO-ATOMIC-NEXT: li a3, 5 ; RV64-NO-ATOMIC-NEXT: li a4, 5 ; RV64-NO-ATOMIC-NEXT: mv a0, s0 ; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 -; RV64-NO-ATOMIC-NEXT: ld a1, 0(sp) -; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB50_4 +; RV64-NO-ATOMIC-NEXT: mv a1, a0 +; RV64-NO-ATOMIC-NEXT: ld a0, 0(sp) +; RV64-NO-ATOMIC-NEXT: bnez a1, .LBB50_4 ; RV64-NO-ATOMIC-NEXT: .LBB50_2: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64-NO-ATOMIC-NEXT: mv a2, a1 -; RV64-NO-ATOMIC-NEXT: blt a1, s1, .LBB50_1 +; RV64-NO-ATOMIC-NEXT: mv a2, a0 +; RV64-NO-ATOMIC-NEXT: blt a0, s1, .LBB50_1 ; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB50_2 Depth=1 ; RV64-NO-ATOMIC-NEXT: li a2, 1 ; RV64-NO-ATOMIC-NEXT: j .LBB50_1 ; RV64-NO-ATOMIC-NEXT: .LBB50_4: # %atomicrmw.end -; RV64-NO-ATOMIC-NEXT: mv a0, a1 ; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -3560,37 +3560,37 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a1, 4(a0) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: lw a1, 4(s0) ; RV32-NEXT: j .LBB51_2 ; RV32-NEXT: .LBB51_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 -; RV32-NEXT: neg a3, a0 +; RV32-NEXT: neg a3, a3 ; RV32-NEXT: and a3, a3, a1 -; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 0(sp) +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: lw a0, 0(sp) ; RV32-NEXT: lw a1, 4(sp) -; RV32-NEXT: bnez a0, .LBB51_4 +; RV32-NEXT: bnez a2, .LBB51_4 ; RV32-NEXT: .LBB51_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: snez a0, a1 -; RV32-NEXT: sltiu a2, a4, 2 -; RV32-NEXT: xori a2, a2, 1 -; RV32-NEXT: or a0, a2, a0 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: bnez a0, .LBB51_1 +; RV32-NEXT: snez a2, a1 +; RV32-NEXT: sltiu a3, a0, 2 +; RV32-NEXT: xori a3, a3, 1 +; RV32-NEXT: or a3, a3, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bnez a3, .LBB51_1 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB51_2 Depth=1 ; RV32-NEXT: li a2, 1 ; RV32-NEXT: j .LBB51_1 ; RV32-NEXT: .LBB51_4: # %atomicrmw.end -; RV32-NEXT: mv a0, a4 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 @@ -3602,21 +3602,21 @@ define i64 @rmw64_umax_seq_cst(ptr %p) nounwind { ; RV64-NO-ATOMIC-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: mv s0, a0 -; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0) +; RV64-NO-ATOMIC-NEXT: ld a0, 0(a0) ; RV64-NO-ATOMIC-NEXT: .LBB51_1: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64-NO-ATOMIC-NEXT: seqz a2, a1 -; RV64-NO-ATOMIC-NEXT: add a2, a1, a2 -; RV64-NO-ATOMIC-NEXT: sd a1, 8(sp) +; RV64-NO-ATOMIC-NEXT: seqz a2, a0 +; RV64-NO-ATOMIC-NEXT: add a2, a0, a2 +; RV64-NO-ATOMIC-NEXT: sd a0, 8(sp) ; RV64-NO-ATOMIC-NEXT: addi a1, sp, 8 ; RV64-NO-ATOMIC-NEXT: li a3, 5 ; RV64-NO-ATOMIC-NEXT: li a4, 5 ; RV64-NO-ATOMIC-NEXT: mv a0, s0 ; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 -; RV64-NO-ATOMIC-NEXT: ld a1, 8(sp) -; RV64-NO-ATOMIC-NEXT: beqz a0, .LBB51_1 +; RV64-NO-ATOMIC-NEXT: mv a1, a0 +; RV64-NO-ATOMIC-NEXT: ld a0, 8(sp) +; RV64-NO-ATOMIC-NEXT: beqz a1, .LBB51_1 ; RV64-NO-ATOMIC-NEXT: # %bb.2: # %atomicrmw.end -; RV64-NO-ATOMIC-NEXT: mv a0, a1 ; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: addi sp, sp, 32 @@ -3652,36 +3652,36 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind { ; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a1, 4(a0) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: lw a1, 4(s0) ; RV32-NEXT: j .LBB52_2 ; RV32-NEXT: .LBB52_1: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV32-NEXT: neg a3, a0 +; RV32-NEXT: neg a3, a3 ; RV32-NEXT: and a3, a3, a1 -; RV32-NEXT: sw a4, 0(sp) +; RV32-NEXT: sw a0, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: mv a1, sp ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 ; RV32-NEXT: mv a0, s0 ; RV32-NEXT: call __atomic_compare_exchange_8 -; RV32-NEXT: lw a4, 0(sp) +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: lw a0, 0(sp) ; RV32-NEXT: lw a1, 4(sp) -; RV32-NEXT: bnez a0, .LBB52_4 +; RV32-NEXT: bnez a2, .LBB52_4 ; RV32-NEXT: .LBB52_2: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32-NEXT: sltiu a0, a4, 2 -; RV32-NEXT: seqz a2, a1 -; RV32-NEXT: and a0, a2, a0 -; RV32-NEXT: mv a2, a4 -; RV32-NEXT: bnez a0, .LBB52_1 +; RV32-NEXT: sltiu a2, a0, 2 +; RV32-NEXT: seqz a3, a1 +; RV32-NEXT: and a3, a3, a2 +; RV32-NEXT: mv a2, a0 +; RV32-NEXT: bnez a3, .LBB52_1 ; RV32-NEXT: # %bb.3: # %atomicrmw.start ; RV32-NEXT: # in Loop: Header=BB52_2 Depth=1 ; RV32-NEXT: li a2, 1 ; RV32-NEXT: j .LBB52_1 ; RV32-NEXT: .LBB52_4: # %atomicrmw.end -; RV32-NEXT: mv a0, a4 ; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 16 @@ -3694,29 +3694,29 @@ define i64 @rmw64_umin_seq_cst(ptr %p) nounwind { ; RV64-NO-ATOMIC-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: sd s1, 8(sp) # 8-byte Folded Spill ; RV64-NO-ATOMIC-NEXT: mv s0, a0 -; RV64-NO-ATOMIC-NEXT: ld a1, 0(a0) +; RV64-NO-ATOMIC-NEXT: ld a0, 0(a0) ; RV64-NO-ATOMIC-NEXT: li s1, 2 ; RV64-NO-ATOMIC-NEXT: j .LBB52_2 ; RV64-NO-ATOMIC-NEXT: .LBB52_1: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB52_2 Depth=1 -; RV64-NO-ATOMIC-NEXT: sd a1, 0(sp) +; RV64-NO-ATOMIC-NEXT: sd a0, 0(sp) ; RV64-NO-ATOMIC-NEXT: mv a1, sp ; RV64-NO-ATOMIC-NEXT: li a3, 5 ; RV64-NO-ATOMIC-NEXT: li a4, 5 ; RV64-NO-ATOMIC-NEXT: mv a0, s0 ; RV64-NO-ATOMIC-NEXT: call __atomic_compare_exchange_8 -; RV64-NO-ATOMIC-NEXT: ld a1, 0(sp) -; RV64-NO-ATOMIC-NEXT: bnez a0, .LBB52_4 +; RV64-NO-ATOMIC-NEXT: mv a1, a0 +; RV64-NO-ATOMIC-NEXT: ld a0, 0(sp) +; RV64-NO-ATOMIC-NEXT: bnez a1, .LBB52_4 ; RV64-NO-ATOMIC-NEXT: .LBB52_2: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64-NO-ATOMIC-NEXT: mv a2, a1 -; RV64-NO-ATOMIC-NEXT: bltu a1, s1, .LBB52_1 +; RV64-NO-ATOMIC-NEXT: mv a2, a0 +; RV64-NO-ATOMIC-NEXT: bltu a0, s1, .LBB52_1 ; RV64-NO-ATOMIC-NEXT: # %bb.3: # %atomicrmw.start ; RV64-NO-ATOMIC-NEXT: # in Loop: Header=BB52_2 Depth=1 ; RV64-NO-ATOMIC-NEXT: li a2, 1 ; RV64-NO-ATOMIC-NEXT: j .LBB52_1 ; RV64-NO-ATOMIC-NEXT: .LBB52_4: # %atomicrmw.end -; RV64-NO-ATOMIC-NEXT: mv a0, a1 ; RV64-NO-ATOMIC-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64-NO-ATOMIC-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -4530,12 +4530,12 @@ define i128 @rmw128(ptr %p) nounwind { ; RV32-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s0, 40(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s1, 36(sp) # 4-byte Folded Spill -; RV32-NEXT: mv s0, a1 +; RV32-NEXT: mv s1, a1 +; RV32-NEXT: mv s0, a0 ; RV32-NEXT: lw a4, 0(a1) ; RV32-NEXT: lw a3, 4(a1) ; RV32-NEXT: lw a1, 8(a1) -; RV32-NEXT: lw a2, 12(s0) -; RV32-NEXT: mv s1, a0 +; RV32-NEXT: lw a2, 12(s1) ; RV32-NEXT: .LBB62_1: # %atomicrmw.start ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: addi a0, a4, 1 @@ -4559,7 +4559,7 @@ define i128 @rmw128(ptr %p) nounwind { ; RV32-NEXT: mv a3, sp ; RV32-NEXT: li a4, 5 ; RV32-NEXT: li a5, 5 -; RV32-NEXT: mv a1, s0 +; RV32-NEXT: mv a1, s1 ; RV32-NEXT: call __atomic_compare_exchange ; RV32-NEXT: lw a4, 16(sp) ; RV32-NEXT: lw a3, 20(sp) @@ -4567,10 +4567,10 @@ define i128 @rmw128(ptr %p) nounwind { ; RV32-NEXT: lw a2, 28(sp) ; RV32-NEXT: beqz a0, .LBB62_1 ; RV32-NEXT: # %bb.2: # %atomicrmw.end -; RV32-NEXT: sw a4, 0(s1) -; RV32-NEXT: sw a3, 4(s1) -; RV32-NEXT: sw a1, 8(s1) -; RV32-NEXT: sw a2, 12(s1) +; RV32-NEXT: sw a4, 0(s0) +; RV32-NEXT: sw a3, 4(s0) +; RV32-NEXT: sw a1, 8(s0) +; RV32-NEXT: sw a2, 12(s0) ; RV32-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 36(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll b/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll index e9b771a0698de..7da9bbbb079e9 100644 --- a/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll +++ b/llvm/test/CodeGen/RISCV/fp-fcanonicalize.ll @@ -306,12 +306,12 @@ define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) { ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs0, -48 ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs1, -56 ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs2, -64 +; CHECK-NOFP16-RV64-NEXT: mv s0, a0 ; CHECK-NOFP16-RV64-NEXT: lhu s1, 0(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s2, 8(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s3, 16(a1) -; CHECK-NOFP16-RV64-NEXT: lhu a1, 24(a1) -; CHECK-NOFP16-RV64-NEXT: mv s0, a0 -; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, a1 +; CHECK-NOFP16-RV64-NEXT: lhu a0, 24(a1) +; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, a0 ; CHECK-NOFP16-RV64-NEXT: call __extendhfsf2 ; CHECK-NOFP16-RV64-NEXT: fmv.s fs0, fa0 ; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, s3 @@ -330,8 +330,8 @@ define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) { ; CHECK-NOFP16-RV64-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s1, fa0 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s2, fs2 -; CHECK-NOFP16-RV64-NEXT: fmin.s fa0, fs0, fs0 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s3, fs1 +; CHECK-NOFP16-RV64-NEXT: fmin.s fa0, fs0, fs0 ; CHECK-NOFP16-RV64-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w a0, fa0 ; CHECK-NOFP16-RV64-NEXT: sh s1, 0(s0) @@ -419,8 +419,8 @@ define <4 x half> @fcanonicalize_v4f16(<4 x half> %x) { ; CHECK-NOFP16-RV32-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s1, fa0 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s2, fs1 -; CHECK-NOFP16-RV32-NEXT: fmin.s fa0, fs3, fs3 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s3, fs2 +; CHECK-NOFP16-RV32-NEXT: fmin.s fa0, fs3, fs3 ; CHECK-NOFP16-RV32-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w a0, fa0 ; CHECK-NOFP16-RV32-NEXT: sh s1, 0(s0) @@ -485,12 +485,12 @@ define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) { ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs0, -48 ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs1, -56 ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs2, -64 +; CHECK-NOFP16-RV64-NEXT: mv s0, a0 ; CHECK-NOFP16-RV64-NEXT: lhu s1, 0(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s2, 8(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s3, 16(a1) -; CHECK-NOFP16-RV64-NEXT: lhu a1, 24(a1) -; CHECK-NOFP16-RV64-NEXT: mv s0, a0 -; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, a1 +; CHECK-NOFP16-RV64-NEXT: lhu a0, 24(a1) +; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, a0 ; CHECK-NOFP16-RV64-NEXT: call __extendhfsf2 ; CHECK-NOFP16-RV64-NEXT: fmv.s fs0, fa0 ; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, s3 @@ -509,8 +509,8 @@ define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) { ; CHECK-NOFP16-RV64-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s1, fa0 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s2, fs2 -; CHECK-NOFP16-RV64-NEXT: fmin.s fa0, fs0, fs0 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s3, fs1 +; CHECK-NOFP16-RV64-NEXT: fmin.s fa0, fs0, fs0 ; CHECK-NOFP16-RV64-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w a0, fa0 ; CHECK-NOFP16-RV64-NEXT: sh s1, 0(s0) @@ -598,8 +598,8 @@ define <4 x half> @fcanonicalize_v4f16_nnan(<4 x half> %x) { ; CHECK-NOFP16-RV32-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s1, fa0 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s2, fs1 -; CHECK-NOFP16-RV32-NEXT: fmin.s fa0, fs3, fs3 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s3, fs2 +; CHECK-NOFP16-RV32-NEXT: fmin.s fa0, fs3, fs3 ; CHECK-NOFP16-RV32-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w a0, fa0 ; CHECK-NOFP16-RV32-NEXT: sh s1, 0(s0) @@ -688,6 +688,7 @@ define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) { ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs4, -112 ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs5, -120 ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs6, -128 +; CHECK-NOFP16-RV64-NEXT: mv s0, a0 ; CHECK-NOFP16-RV64-NEXT: lhu s1, 0(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s2, 8(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s3, 16(a1) @@ -695,9 +696,8 @@ define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) { ; CHECK-NOFP16-RV64-NEXT: lhu s5, 32(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s6, 40(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s7, 48(a1) -; CHECK-NOFP16-RV64-NEXT: lhu a1, 56(a1) -; CHECK-NOFP16-RV64-NEXT: mv s0, a0 -; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, a1 +; CHECK-NOFP16-RV64-NEXT: lhu a0, 56(a1) +; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, a0 ; CHECK-NOFP16-RV64-NEXT: call __extendhfsf2 ; CHECK-NOFP16-RV64-NEXT: fmv.s fs0, fa0 ; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, s7 @@ -740,8 +740,8 @@ define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) { ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s4, fs4 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s5, fs3 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s6, fs2 -; CHECK-NOFP16-RV64-NEXT: fmin.s fa0, fs0, fs0 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s7, fs1 +; CHECK-NOFP16-RV64-NEXT: fmin.s fa0, fs0, fs0 ; CHECK-NOFP16-RV64-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w a0, fa0 ; CHECK-NOFP16-RV64-NEXT: sh s5, 8(s0) @@ -905,8 +905,8 @@ define <8 x half> @fcanonicalize_v8f16(<8 x half> %x) { ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s4, fs5 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s5, fs6 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s6, fs7 -; CHECK-NOFP16-RV32-NEXT: fmin.s fa0, fs1, fs1 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s7, fs3 +; CHECK-NOFP16-RV32-NEXT: fmin.s fa0, fs1, fs1 ; CHECK-NOFP16-RV32-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w a0, fa0 ; CHECK-NOFP16-RV32-NEXT: sh s5, 8(s0) @@ -1015,6 +1015,7 @@ define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) { ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs4, -112 ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs5, -120 ; CHECK-NOFP16-RV64-NEXT: .cfi_offset fs6, -128 +; CHECK-NOFP16-RV64-NEXT: mv s0, a0 ; CHECK-NOFP16-RV64-NEXT: lhu s1, 0(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s2, 8(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s3, 16(a1) @@ -1022,9 +1023,8 @@ define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) { ; CHECK-NOFP16-RV64-NEXT: lhu s5, 32(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s6, 40(a1) ; CHECK-NOFP16-RV64-NEXT: lhu s7, 48(a1) -; CHECK-NOFP16-RV64-NEXT: lhu a1, 56(a1) -; CHECK-NOFP16-RV64-NEXT: mv s0, a0 -; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, a1 +; CHECK-NOFP16-RV64-NEXT: lhu a0, 56(a1) +; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, a0 ; CHECK-NOFP16-RV64-NEXT: call __extendhfsf2 ; CHECK-NOFP16-RV64-NEXT: fmv.s fs0, fa0 ; CHECK-NOFP16-RV64-NEXT: fmv.w.x fa0, s7 @@ -1067,8 +1067,8 @@ define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) { ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s4, fs4 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s5, fs3 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s6, fs2 -; CHECK-NOFP16-RV64-NEXT: fmin.s fa0, fs0, fs0 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w s7, fs1 +; CHECK-NOFP16-RV64-NEXT: fmin.s fa0, fs0, fs0 ; CHECK-NOFP16-RV64-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV64-NEXT: fmv.x.w a0, fa0 ; CHECK-NOFP16-RV64-NEXT: sh s5, 8(s0) @@ -1232,8 +1232,8 @@ define <8 x half> @fcanonicalize_v8f16_nnan(<8 x half> %x) { ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s4, fs5 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s5, fs6 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s6, fs7 -; CHECK-NOFP16-RV32-NEXT: fmin.s fa0, fs1, fs1 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w s7, fs3 +; CHECK-NOFP16-RV32-NEXT: fmin.s fa0, fs1, fs1 ; CHECK-NOFP16-RV32-NEXT: call __truncsfhf2 ; CHECK-NOFP16-RV32-NEXT: fmv.x.w a0, fa0 ; CHECK-NOFP16-RV32-NEXT: sh s5, 8(s0) diff --git a/llvm/test/CodeGen/RISCV/fp128.ll b/llvm/test/CodeGen/RISCV/fp128.ll index a8e26f7686e50..443bd22c58a21 100644 --- a/llvm/test/CodeGen/RISCV/fp128.ll +++ b/llvm/test/CodeGen/RISCV/fp128.ll @@ -18,21 +18,21 @@ define i32 @test_load_and_cmp() nounwind { ; RV32I-NEXT: lw a2, %lo(x)(a0) ; RV32I-NEXT: lw a3, %lo(x+4)(a0) ; RV32I-NEXT: lw a4, %lo(x+8)(a0) -; RV32I-NEXT: lw a5, %lo(x+12)(a0) -; RV32I-NEXT: lw a0, %lo(y)(a1) +; RV32I-NEXT: lw a0, %lo(x+12)(a0) +; RV32I-NEXT: lw a5, %lo(y)(a1) ; RV32I-NEXT: lw a6, %lo(y+4)(a1) ; RV32I-NEXT: lw a7, %lo(y+8)(a1) ; RV32I-NEXT: lw a1, %lo(y+12)(a1) -; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a5, 8(sp) ; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: sw a7, 16(sp) ; RV32I-NEXT: sw a1, 20(sp) -; RV32I-NEXT: addi a0, sp, 24 -; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: sw a2, 24(sp) ; RV32I-NEXT: sw a3, 28(sp) ; RV32I-NEXT: sw a4, 32(sp) -; RV32I-NEXT: sw a5, 36(sp) +; RV32I-NEXT: sw a0, 36(sp) +; RV32I-NEXT: addi a0, sp, 24 +; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: call __netf2 ; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -52,35 +52,35 @@ define i32 @test_add_and_fptosi() nounwind { ; RV32I-NEXT: sw ra, 76(sp) # 4-byte Folded Spill ; RV32I-NEXT: lui a0, %hi(x) ; RV32I-NEXT: lui a1, %hi(y) -; RV32I-NEXT: lw a3, %lo(x)(a0) -; RV32I-NEXT: lw a4, %lo(x+4)(a0) -; RV32I-NEXT: lw a5, %lo(x+8)(a0) -; RV32I-NEXT: lw a6, %lo(x+12)(a0) -; RV32I-NEXT: lw a0, %lo(y)(a1) -; RV32I-NEXT: lw a2, %lo(y+4)(a1) +; RV32I-NEXT: lw a2, %lo(x)(a0) +; RV32I-NEXT: lw a3, %lo(x+4)(a0) +; RV32I-NEXT: lw a4, %lo(x+8)(a0) +; RV32I-NEXT: lw a0, %lo(x+12)(a0) +; RV32I-NEXT: lw a5, %lo(y)(a1) +; RV32I-NEXT: lw a6, %lo(y+4)(a1) ; RV32I-NEXT: lw a7, %lo(y+8)(a1) ; RV32I-NEXT: lw a1, %lo(y+12)(a1) -; RV32I-NEXT: sw a0, 24(sp) -; RV32I-NEXT: sw a2, 28(sp) +; RV32I-NEXT: sw a5, 24(sp) +; RV32I-NEXT: sw a6, 28(sp) ; RV32I-NEXT: sw a7, 32(sp) ; RV32I-NEXT: sw a1, 36(sp) +; RV32I-NEXT: sw a2, 40(sp) +; RV32I-NEXT: sw a3, 44(sp) +; RV32I-NEXT: sw a4, 48(sp) +; RV32I-NEXT: sw a0, 52(sp) ; RV32I-NEXT: addi a0, sp, 56 ; RV32I-NEXT: addi a1, sp, 40 ; RV32I-NEXT: addi a2, sp, 24 -; RV32I-NEXT: sw a3, 40(sp) -; RV32I-NEXT: sw a4, 44(sp) -; RV32I-NEXT: sw a5, 48(sp) -; RV32I-NEXT: sw a6, 52(sp) ; RV32I-NEXT: call __addtf3 -; RV32I-NEXT: lw a1, 56(sp) -; RV32I-NEXT: lw a2, 60(sp) -; RV32I-NEXT: lw a3, 64(sp) -; RV32I-NEXT: lw a4, 68(sp) +; RV32I-NEXT: lw a0, 56(sp) +; RV32I-NEXT: lw a1, 60(sp) +; RV32I-NEXT: lw a2, 64(sp) +; RV32I-NEXT: lw a3, 68(sp) +; RV32I-NEXT: sw a0, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) +; RV32I-NEXT: sw a2, 16(sp) +; RV32I-NEXT: sw a3, 20(sp) ; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: sw a1, 8(sp) -; RV32I-NEXT: sw a2, 12(sp) -; RV32I-NEXT: sw a3, 16(sp) -; RV32I-NEXT: sw a4, 20(sp) ; RV32I-NEXT: call __fixtfsi ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 80 @@ -101,26 +101,26 @@ define fp128 @fmaximum(fp128 %x, fp128 %y) { ; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) -; RV32I-NEXT: lw a1, 0(a2) -; RV32I-NEXT: lw a7, 4(a2) -; RV32I-NEXT: lw t0, 8(a2) -; RV32I-NEXT: lw a2, 12(a2) ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: sw a1, 8(sp) -; RV32I-NEXT: sw a7, 12(sp) -; RV32I-NEXT: sw t0, 16(sp) +; RV32I-NEXT: lw a0, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a6, 4(a2) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) +; RV32I-NEXT: sw a7, 16(sp) ; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a3, 28(sp) +; RV32I-NEXT: sw a4, 32(sp) +; RV32I-NEXT: sw a1, 36(sp) ; RV32I-NEXT: addi a0, sp, 40 ; RV32I-NEXT: addi a1, sp, 24 ; RV32I-NEXT: addi a2, sp, 8 -; RV32I-NEXT: sw a3, 24(sp) -; RV32I-NEXT: sw a4, 28(sp) -; RV32I-NEXT: sw a5, 32(sp) -; RV32I-NEXT: sw a6, 36(sp) ; RV32I-NEXT: call fmaximuml ; RV32I-NEXT: lw a0, 40(sp) ; RV32I-NEXT: lw a1, 44(sp) @@ -150,26 +150,26 @@ define fp128 @fminimum(fp128 %x, fp128 %y) { ; RV32I-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) -; RV32I-NEXT: lw a1, 0(a2) -; RV32I-NEXT: lw a7, 4(a2) -; RV32I-NEXT: lw t0, 8(a2) -; RV32I-NEXT: lw a2, 12(a2) ; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: sw a1, 8(sp) -; RV32I-NEXT: sw a7, 12(sp) -; RV32I-NEXT: sw t0, 16(sp) +; RV32I-NEXT: lw a0, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a6, 4(a2) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw a2, 12(a2) +; RV32I-NEXT: sw a5, 8(sp) +; RV32I-NEXT: sw a6, 12(sp) +; RV32I-NEXT: sw a7, 16(sp) ; RV32I-NEXT: sw a2, 20(sp) +; RV32I-NEXT: sw a0, 24(sp) +; RV32I-NEXT: sw a3, 28(sp) +; RV32I-NEXT: sw a4, 32(sp) +; RV32I-NEXT: sw a1, 36(sp) ; RV32I-NEXT: addi a0, sp, 40 ; RV32I-NEXT: addi a1, sp, 24 ; RV32I-NEXT: addi a2, sp, 8 -; RV32I-NEXT: sw a3, 24(sp) -; RV32I-NEXT: sw a4, 28(sp) -; RV32I-NEXT: sw a5, 32(sp) -; RV32I-NEXT: sw a6, 36(sp) ; RV32I-NEXT: call fminimuml ; RV32I-NEXT: lw a0, 40(sp) ; RV32I-NEXT: lw a1, 44(sp) diff --git a/llvm/test/CodeGen/RISCV/fpclamptosat.ll b/llvm/test/CodeGen/RISCV/fpclamptosat.ll index c5c3b199447a9..2c1503cc162ea 100644 --- a/llvm/test/CodeGen/RISCV/fpclamptosat.ll +++ b/llvm/test/CodeGen/RISCV/fpclamptosat.ll @@ -192,8 +192,8 @@ define i32 @ustest_f64i32(double %x) { ; RV32IF-NEXT: .LBB2_3: # %entry ; RV32IF-NEXT: addi a3, a2, -1 ; RV32IF-NEXT: neg a2, a2 -; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: or a0, a3, a0 +; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: beqz a1, .LBB2_5 ; RV32IF-NEXT: # %bb.4: # %entry ; RV32IF-NEXT: sgtz a1, a1 @@ -501,8 +501,8 @@ define i32 @ustest_f16i32(half %x) { ; RV32-NEXT: .LBB8_3: # %entry ; RV32-NEXT: addi a3, a2, -1 ; RV32-NEXT: neg a2, a2 -; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: or a0, a3, a0 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: beqz a1, .LBB8_5 ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: sgtz a1, a1 @@ -1277,20 +1277,20 @@ define i64 @utest_f64i64(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixunsdfti -; RV32IF-NEXT: lw a0, 16(sp) -; RV32IF-NEXT: lw a1, 20(sp) -; RV32IF-NEXT: lw a2, 12(sp) -; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: or a4, a1, a0 -; RV32IF-NEXT: xori a0, a0, 1 +; RV32IF-NEXT: lw a0, 8(sp) +; RV32IF-NEXT: lw a1, 12(sp) +; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a3, 20(sp) +; RV32IF-NEXT: or a4, a3, a2 +; RV32IF-NEXT: xori a2, a2, 1 ; RV32IF-NEXT: seqz a4, a4 -; RV32IF-NEXT: or a0, a0, a1 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi a0, a0, -1 -; RV32IF-NEXT: and a0, a0, a4 -; RV32IF-NEXT: neg a1, a0 -; RV32IF-NEXT: and a0, a1, a3 -; RV32IF-NEXT: and a1, a1, a2 +; RV32IF-NEXT: or a2, a2, a3 +; RV32IF-NEXT: seqz a2, a2 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a2, a2, a4 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: .cfi_restore ra ; RV32IF-NEXT: addi sp, sp, 32 @@ -1321,20 +1321,20 @@ define i64 @utest_f64i64(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixunsdfti -; RV32IFD-NEXT: lw a0, 16(sp) -; RV32IFD-NEXT: lw a1, 20(sp) -; RV32IFD-NEXT: lw a2, 12(sp) -; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: or a4, a1, a0 -; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a3, 20(sp) +; RV32IFD-NEXT: or a4, a3, a2 +; RV32IFD-NEXT: xori a2, a2, 1 ; RV32IFD-NEXT: seqz a4, a4 -; RV32IFD-NEXT: or a0, a0, a1 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi a0, a0, -1 -; RV32IFD-NEXT: and a0, a0, a4 -; RV32IFD-NEXT: neg a1, a0 -; RV32IFD-NEXT: and a0, a1, a3 -; RV32IFD-NEXT: and a1, a1, a2 +; RV32IFD-NEXT: or a2, a2, a3 +; RV32IFD-NEXT: seqz a2, a2 +; RV32IFD-NEXT: addi a2, a2, -1 +; RV32IFD-NEXT: and a2, a2, a4 +; RV32IFD-NEXT: neg a2, a2 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: and a1, a2, a1 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: .cfi_restore ra ; RV32IFD-NEXT: addi sp, sp, 32 @@ -1359,8 +1359,8 @@ define i64 @ustest_f64i64(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: lw a0, 16(sp) +; RV32IF-NEXT: lw a1, 20(sp) ; RV32IF-NEXT: beqz a1, .LBB20_2 ; RV32IF-NEXT: # %bb.1: # %entry ; RV32IF-NEXT: slti a2, a1, 0 @@ -1378,8 +1378,8 @@ define i64 @ustest_f64i64(double %x) { ; RV32IF-NEXT: # %bb.4: # %entry ; RV32IF-NEXT: li a0, 1 ; RV32IF-NEXT: .LBB20_5: # %entry -; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: lw a4, 12(sp) +; RV32IF-NEXT: lw a4, 8(sp) +; RV32IF-NEXT: lw a3, 12(sp) ; RV32IF-NEXT: and a5, a2, a1 ; RV32IF-NEXT: beqz a5, .LBB20_7 ; RV32IF-NEXT: # %bb.6: # %entry @@ -1388,17 +1388,17 @@ define i64 @ustest_f64i64(double %x) { ; RV32IF-NEXT: .LBB20_7: ; RV32IF-NEXT: snez a1, a0 ; RV32IF-NEXT: .LBB20_8: # %entry -; RV32IF-NEXT: and a4, a2, a4 +; RV32IF-NEXT: and a3, a2, a3 +; RV32IF-NEXT: and a2, a2, a4 ; RV32IF-NEXT: or a0, a0, a5 -; RV32IF-NEXT: and a2, a2, a3 ; RV32IF-NEXT: bnez a0, .LBB20_10 ; RV32IF-NEXT: # %bb.9: -; RV32IF-NEXT: or a0, a2, a4 +; RV32IF-NEXT: or a0, a2, a3 ; RV32IF-NEXT: snez a1, a0 ; RV32IF-NEXT: .LBB20_10: # %entry ; RV32IF-NEXT: neg a1, a1 ; RV32IF-NEXT: and a0, a1, a2 -; RV32IF-NEXT: and a1, a1, a4 +; RV32IF-NEXT: and a1, a1, a3 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: .cfi_restore ra ; RV32IF-NEXT: addi sp, sp, 32 @@ -1442,8 +1442,8 @@ define i64 @ustest_f64i64(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: lw a0, 16(sp) +; RV32IFD-NEXT: lw a1, 20(sp) ; RV32IFD-NEXT: beqz a1, .LBB20_2 ; RV32IFD-NEXT: # %bb.1: # %entry ; RV32IFD-NEXT: slti a2, a1, 0 @@ -1461,8 +1461,8 @@ define i64 @ustest_f64i64(double %x) { ; RV32IFD-NEXT: # %bb.4: # %entry ; RV32IFD-NEXT: li a0, 1 ; RV32IFD-NEXT: .LBB20_5: # %entry -; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: lw a4, 12(sp) +; RV32IFD-NEXT: lw a4, 8(sp) +; RV32IFD-NEXT: lw a3, 12(sp) ; RV32IFD-NEXT: and a5, a2, a1 ; RV32IFD-NEXT: beqz a5, .LBB20_7 ; RV32IFD-NEXT: # %bb.6: # %entry @@ -1471,17 +1471,17 @@ define i64 @ustest_f64i64(double %x) { ; RV32IFD-NEXT: .LBB20_7: ; RV32IFD-NEXT: snez a1, a0 ; RV32IFD-NEXT: .LBB20_8: # %entry -; RV32IFD-NEXT: and a4, a2, a4 +; RV32IFD-NEXT: and a3, a2, a3 +; RV32IFD-NEXT: and a2, a2, a4 ; RV32IFD-NEXT: or a0, a0, a5 -; RV32IFD-NEXT: and a2, a2, a3 ; RV32IFD-NEXT: bnez a0, .LBB20_10 ; RV32IFD-NEXT: # %bb.9: -; RV32IFD-NEXT: or a0, a2, a4 +; RV32IFD-NEXT: or a0, a2, a3 ; RV32IFD-NEXT: snez a1, a0 ; RV32IFD-NEXT: .LBB20_10: # %entry ; RV32IFD-NEXT: neg a1, a1 ; RV32IFD-NEXT: and a0, a1, a2 -; RV32IFD-NEXT: and a1, a1, a4 +; RV32IFD-NEXT: and a1, a1, a3 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: .cfi_restore ra ; RV32IFD-NEXT: addi sp, sp, 32 @@ -1587,20 +1587,20 @@ define i64 @utest_f32i64(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a1, 20(sp) -; RV32-NEXT: lw a2, 12(sp) -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a3, 20(sp) +; RV32-NEXT: or a4, a3, a2 +; RV32-NEXT: xori a2, a2, 1 ; RV32-NEXT: seqz a4, a4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: neg a1, a0 -; RV32-NEXT: and a0, a1, a3 -; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a2, a2, a3 +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -1639,8 +1639,8 @@ define i64 @ustest_f32i64(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: beqz a1, .LBB23_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: slti a2, a1, 0 @@ -1658,8 +1658,8 @@ define i64 @ustest_f32i64(float %x) { ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB23_5: # %entry -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a3, 12(sp) ; RV32-NEXT: and a5, a2, a1 ; RV32-NEXT: beqz a5, .LBB23_7 ; RV32-NEXT: # %bb.6: # %entry @@ -1668,17 +1668,17 @@ define i64 @ustest_f32i64(float %x) { ; RV32-NEXT: .LBB23_7: ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB23_8: # %entry -; RV32-NEXT: and a4, a2, a4 +; RV32-NEXT: and a3, a2, a3 +; RV32-NEXT: and a2, a2, a4 ; RV32-NEXT: or a0, a0, a5 -; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: bnez a0, .LBB23_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: or a0, a2, a4 +; RV32-NEXT: or a0, a2, a3 ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB23_10: # %entry ; RV32-NEXT: neg a1, a1 ; RV32-NEXT: and a0, a1, a2 -; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -1848,20 +1848,20 @@ define i64 @utesth_f16i64(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a1, 20(sp) -; RV32-NEXT: lw a2, 12(sp) -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a3, 20(sp) +; RV32-NEXT: or a4, a3, a2 +; RV32-NEXT: xori a2, a2, 1 ; RV32-NEXT: seqz a4, a4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: neg a1, a0 -; RV32-NEXT: and a0, a1, a3 -; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a2, a2, a3 +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -1902,8 +1902,8 @@ define i64 @ustest_f16i64(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: lw a0, 16(sp) +; RV32-NEXT: lw a1, 20(sp) ; RV32-NEXT: beqz a1, .LBB26_2 ; RV32-NEXT: # %bb.1: # %entry ; RV32-NEXT: slti a2, a1, 0 @@ -1921,8 +1921,8 @@ define i64 @ustest_f16i64(half %x) { ; RV32-NEXT: # %bb.4: # %entry ; RV32-NEXT: li a0, 1 ; RV32-NEXT: .LBB26_5: # %entry -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: lw a4, 12(sp) +; RV32-NEXT: lw a4, 8(sp) +; RV32-NEXT: lw a3, 12(sp) ; RV32-NEXT: and a5, a2, a1 ; RV32-NEXT: beqz a5, .LBB26_7 ; RV32-NEXT: # %bb.6: # %entry @@ -1931,17 +1931,17 @@ define i64 @ustest_f16i64(half %x) { ; RV32-NEXT: .LBB26_7: ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB26_8: # %entry -; RV32-NEXT: and a4, a2, a4 +; RV32-NEXT: and a3, a2, a3 +; RV32-NEXT: and a2, a2, a4 ; RV32-NEXT: or a0, a0, a5 -; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: bnez a0, .LBB26_10 ; RV32-NEXT: # %bb.9: -; RV32-NEXT: or a0, a2, a4 +; RV32-NEXT: or a0, a2, a3 ; RV32-NEXT: snez a1, a0 ; RV32-NEXT: .LBB26_10: # %entry ; RV32-NEXT: neg a1, a1 ; RV32-NEXT: and a0, a1, a2 -; RV32-NEXT: and a1, a1, a4 +; RV32-NEXT: and a1, a1, a3 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -3211,20 +3211,20 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixunsdfti -; RV32IF-NEXT: lw a0, 16(sp) -; RV32IF-NEXT: lw a1, 20(sp) -; RV32IF-NEXT: lw a2, 12(sp) -; RV32IF-NEXT: lw a3, 8(sp) -; RV32IF-NEXT: or a4, a1, a0 -; RV32IF-NEXT: xori a0, a0, 1 +; RV32IF-NEXT: lw a0, 8(sp) +; RV32IF-NEXT: lw a1, 12(sp) +; RV32IF-NEXT: lw a2, 16(sp) +; RV32IF-NEXT: lw a3, 20(sp) +; RV32IF-NEXT: or a4, a3, a2 +; RV32IF-NEXT: xori a2, a2, 1 ; RV32IF-NEXT: seqz a4, a4 -; RV32IF-NEXT: or a0, a0, a1 -; RV32IF-NEXT: seqz a0, a0 -; RV32IF-NEXT: addi a0, a0, -1 -; RV32IF-NEXT: and a0, a0, a4 -; RV32IF-NEXT: neg a1, a0 -; RV32IF-NEXT: and a0, a1, a3 -; RV32IF-NEXT: and a1, a1, a2 +; RV32IF-NEXT: or a2, a2, a3 +; RV32IF-NEXT: seqz a2, a2 +; RV32IF-NEXT: addi a2, a2, -1 +; RV32IF-NEXT: and a2, a2, a4 +; RV32IF-NEXT: neg a2, a2 +; RV32IF-NEXT: and a0, a2, a0 +; RV32IF-NEXT: and a1, a2, a1 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: .cfi_restore ra ; RV32IF-NEXT: addi sp, sp, 32 @@ -3255,20 +3255,20 @@ define i64 @utest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixunsdfti -; RV32IFD-NEXT: lw a0, 16(sp) -; RV32IFD-NEXT: lw a1, 20(sp) -; RV32IFD-NEXT: lw a2, 12(sp) -; RV32IFD-NEXT: lw a3, 8(sp) -; RV32IFD-NEXT: or a4, a1, a0 -; RV32IFD-NEXT: xori a0, a0, 1 +; RV32IFD-NEXT: lw a0, 8(sp) +; RV32IFD-NEXT: lw a1, 12(sp) +; RV32IFD-NEXT: lw a2, 16(sp) +; RV32IFD-NEXT: lw a3, 20(sp) +; RV32IFD-NEXT: or a4, a3, a2 +; RV32IFD-NEXT: xori a2, a2, 1 ; RV32IFD-NEXT: seqz a4, a4 -; RV32IFD-NEXT: or a0, a0, a1 -; RV32IFD-NEXT: seqz a0, a0 -; RV32IFD-NEXT: addi a0, a0, -1 -; RV32IFD-NEXT: and a0, a0, a4 -; RV32IFD-NEXT: neg a1, a0 -; RV32IFD-NEXT: and a0, a1, a3 -; RV32IFD-NEXT: and a1, a1, a2 +; RV32IFD-NEXT: or a2, a2, a3 +; RV32IFD-NEXT: seqz a2, a2 +; RV32IFD-NEXT: addi a2, a2, -1 +; RV32IFD-NEXT: and a2, a2, a4 +; RV32IFD-NEXT: neg a2, a2 +; RV32IFD-NEXT: and a0, a2, a0 +; RV32IFD-NEXT: and a1, a2, a1 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: .cfi_restore ra ; RV32IFD-NEXT: addi sp, sp, 32 @@ -3292,30 +3292,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IF-NEXT: mv a1, a0 ; RV32IF-NEXT: addi a0, sp, 8 ; RV32IF-NEXT: call __fixdfti -; RV32IF-NEXT: lw a0, 20(sp) -; RV32IF-NEXT: lw a1, 8(sp) +; RV32IF-NEXT: lw a0, 8(sp) ; RV32IF-NEXT: lw a2, 12(sp) ; RV32IF-NEXT: lw a3, 16(sp) -; RV32IF-NEXT: beqz a0, .LBB47_2 +; RV32IF-NEXT: lw a1, 20(sp) +; RV32IF-NEXT: beqz a1, .LBB47_2 ; RV32IF-NEXT: # %bb.1: # %entry -; RV32IF-NEXT: slti a4, a0, 0 +; RV32IF-NEXT: slti a4, a1, 0 ; RV32IF-NEXT: j .LBB47_3 ; RV32IF-NEXT: .LBB47_2: ; RV32IF-NEXT: seqz a4, a3 ; RV32IF-NEXT: .LBB47_3: # %entry ; RV32IF-NEXT: xori a3, a3, 1 -; RV32IF-NEXT: or a3, a3, a0 +; RV32IF-NEXT: or a3, a3, a1 ; RV32IF-NEXT: seqz a3, a3 ; RV32IF-NEXT: addi a3, a3, -1 ; RV32IF-NEXT: and a3, a3, a4 ; RV32IF-NEXT: neg a3, a3 ; RV32IF-NEXT: and a2, a3, a2 -; RV32IF-NEXT: and a1, a3, a1 ; RV32IF-NEXT: and a0, a3, a0 -; RV32IF-NEXT: slti a0, a0, 0 -; RV32IF-NEXT: addi a3, a0, -1 -; RV32IF-NEXT: and a0, a3, a1 -; RV32IF-NEXT: and a1, a3, a2 +; RV32IF-NEXT: and a1, a3, a1 +; RV32IF-NEXT: slti a1, a1, 0 +; RV32IF-NEXT: addi a1, a1, -1 +; RV32IF-NEXT: and a0, a1, a0 +; RV32IF-NEXT: and a1, a1, a2 ; RV32IF-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IF-NEXT: .cfi_restore ra ; RV32IF-NEXT: addi sp, sp, 32 @@ -3354,30 +3354,30 @@ define i64 @ustest_f64i64_mm(double %x) { ; RV32IFD-NEXT: .cfi_offset ra, -4 ; RV32IFD-NEXT: addi a0, sp, 8 ; RV32IFD-NEXT: call __fixdfti -; RV32IFD-NEXT: lw a0, 20(sp) -; RV32IFD-NEXT: lw a1, 8(sp) +; RV32IFD-NEXT: lw a0, 8(sp) ; RV32IFD-NEXT: lw a2, 12(sp) ; RV32IFD-NEXT: lw a3, 16(sp) -; RV32IFD-NEXT: beqz a0, .LBB47_2 +; RV32IFD-NEXT: lw a1, 20(sp) +; RV32IFD-NEXT: beqz a1, .LBB47_2 ; RV32IFD-NEXT: # %bb.1: # %entry -; RV32IFD-NEXT: slti a4, a0, 0 +; RV32IFD-NEXT: slti a4, a1, 0 ; RV32IFD-NEXT: j .LBB47_3 ; RV32IFD-NEXT: .LBB47_2: ; RV32IFD-NEXT: seqz a4, a3 ; RV32IFD-NEXT: .LBB47_3: # %entry ; RV32IFD-NEXT: xori a3, a3, 1 -; RV32IFD-NEXT: or a3, a3, a0 +; RV32IFD-NEXT: or a3, a3, a1 ; RV32IFD-NEXT: seqz a3, a3 ; RV32IFD-NEXT: addi a3, a3, -1 ; RV32IFD-NEXT: and a3, a3, a4 ; RV32IFD-NEXT: neg a3, a3 ; RV32IFD-NEXT: and a2, a3, a2 -; RV32IFD-NEXT: and a1, a3, a1 ; RV32IFD-NEXT: and a0, a3, a0 -; RV32IFD-NEXT: slti a0, a0, 0 -; RV32IFD-NEXT: addi a3, a0, -1 -; RV32IFD-NEXT: and a0, a3, a1 -; RV32IFD-NEXT: and a1, a3, a2 +; RV32IFD-NEXT: and a1, a3, a1 +; RV32IFD-NEXT: slti a1, a1, 0 +; RV32IFD-NEXT: addi a1, a1, -1 +; RV32IFD-NEXT: and a0, a1, a0 +; RV32IFD-NEXT: and a1, a1, a2 ; RV32IFD-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IFD-NEXT: .cfi_restore ra ; RV32IFD-NEXT: addi sp, sp, 32 @@ -3479,20 +3479,20 @@ define i64 @utest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a1, 20(sp) -; RV32-NEXT: lw a2, 12(sp) -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a3, 20(sp) +; RV32-NEXT: or a4, a3, a2 +; RV32-NEXT: xori a2, a2, 1 ; RV32-NEXT: seqz a4, a4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: neg a1, a0 -; RV32-NEXT: and a0, a1, a3 -; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a2, a2, a3 +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -3530,30 +3530,30 @@ define i64 @ustest_f32i64_mm(float %x) { ; RV32-NEXT: .cfi_offset ra, -4 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a0, 8(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a0, .LBB50_2 +; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: beqz a1, .LBB50_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a0, 0 +; RV32-NEXT: slti a4, a1, 0 ; RV32-NEXT: j .LBB50_3 ; RV32-NEXT: .LBB50_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB50_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a0 +; RV32-NEXT: or a3, a3, a1 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 ; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: slti a0, a0, 0 -; RV32-NEXT: addi a3, a0, -1 -; RV32-NEXT: and a0, a3, a1 -; RV32-NEXT: and a1, a3, a2 +; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -3714,20 +3714,20 @@ define i64 @utesth_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixunssfti -; RV32-NEXT: lw a0, 16(sp) -; RV32-NEXT: lw a1, 20(sp) -; RV32-NEXT: lw a2, 12(sp) -; RV32-NEXT: lw a3, 8(sp) -; RV32-NEXT: or a4, a1, a0 -; RV32-NEXT: xori a0, a0, 1 +; RV32-NEXT: lw a0, 8(sp) +; RV32-NEXT: lw a1, 12(sp) +; RV32-NEXT: lw a2, 16(sp) +; RV32-NEXT: lw a3, 20(sp) +; RV32-NEXT: or a4, a3, a2 +; RV32-NEXT: xori a2, a2, 1 ; RV32-NEXT: seqz a4, a4 -; RV32-NEXT: or a0, a0, a1 -; RV32-NEXT: seqz a0, a0 -; RV32-NEXT: addi a0, a0, -1 -; RV32-NEXT: and a0, a0, a4 -; RV32-NEXT: neg a1, a0 -; RV32-NEXT: and a0, a1, a3 -; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: or a2, a2, a3 +; RV32-NEXT: seqz a2, a2 +; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a4 +; RV32-NEXT: neg a2, a2 +; RV32-NEXT: and a0, a2, a0 +; RV32-NEXT: and a1, a2, a1 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 @@ -3767,30 +3767,30 @@ define i64 @ustest_f16i64_mm(half %x) { ; RV32-NEXT: call __extendhfsf2 ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: call __fixsfti -; RV32-NEXT: lw a0, 20(sp) -; RV32-NEXT: lw a1, 8(sp) +; RV32-NEXT: lw a0, 8(sp) ; RV32-NEXT: lw a2, 12(sp) ; RV32-NEXT: lw a3, 16(sp) -; RV32-NEXT: beqz a0, .LBB53_2 +; RV32-NEXT: lw a1, 20(sp) +; RV32-NEXT: beqz a1, .LBB53_2 ; RV32-NEXT: # %bb.1: # %entry -; RV32-NEXT: slti a4, a0, 0 +; RV32-NEXT: slti a4, a1, 0 ; RV32-NEXT: j .LBB53_3 ; RV32-NEXT: .LBB53_2: ; RV32-NEXT: seqz a4, a3 ; RV32-NEXT: .LBB53_3: # %entry ; RV32-NEXT: xori a3, a3, 1 -; RV32-NEXT: or a3, a3, a0 +; RV32-NEXT: or a3, a3, a1 ; RV32-NEXT: seqz a3, a3 ; RV32-NEXT: addi a3, a3, -1 ; RV32-NEXT: and a3, a3, a4 ; RV32-NEXT: neg a3, a3 ; RV32-NEXT: and a2, a3, a2 -; RV32-NEXT: and a1, a3, a1 ; RV32-NEXT: and a0, a3, a0 -; RV32-NEXT: slti a0, a0, 0 -; RV32-NEXT: addi a3, a0, -1 -; RV32-NEXT: and a0, a3, a1 -; RV32-NEXT: and a1, a3, a2 +; RV32-NEXT: and a1, a3, a1 +; RV32-NEXT: slti a1, a1, 0 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore ra ; RV32-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll index 9322abcfbbdce..9ca527573e0c6 100644 --- a/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll +++ b/llvm/test/CodeGen/RISCV/get-setcc-result-type.ll @@ -5,22 +5,22 @@ define void @getSetCCResultType(ptr %p, ptr %q) nounwind { ; RV32I-LABEL: getSetCCResultType: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: lw a1, 12(a0) -; RV32I-NEXT: lw a2, 8(a0) -; RV32I-NEXT: lw a3, 4(a0) -; RV32I-NEXT: lw a4, 0(a0) -; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: snez a2, a2 -; RV32I-NEXT: snez a3, a3 +; RV32I-NEXT: lw a1, 0(a0) +; RV32I-NEXT: lw a2, 4(a0) +; RV32I-NEXT: lw a3, 8(a0) +; RV32I-NEXT: lw a4, 12(a0) ; RV32I-NEXT: snez a4, a4 -; RV32I-NEXT: addi a4, a4, -1 -; RV32I-NEXT: addi a3, a3, -1 -; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: snez a3, a3 +; RV32I-NEXT: snez a2, a2 +; RV32I-NEXT: snez a1, a1 ; RV32I-NEXT: addi a1, a1, -1 -; RV32I-NEXT: sw a4, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: addi a2, a2, -1 +; RV32I-NEXT: addi a3, a3, -1 +; RV32I-NEXT: addi a4, a4, -1 +; RV32I-NEXT: sw a1, 0(a0) +; RV32I-NEXT: sw a2, 4(a0) +; RV32I-NEXT: sw a3, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret entry: %0 = load <4 x i32>, ptr %p, align 16 diff --git a/llvm/test/CodeGen/RISCV/half-arith.ll b/llvm/test/CodeGen/RISCV/half-arith.ll index a218e89948d4b..690bf6c284eb2 100644 --- a/llvm/test/CodeGen/RISCV/half-arith.ll +++ b/llvm/test/CodeGen/RISCV/half-arith.ll @@ -2885,14 +2885,14 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; ; RV32IZFHMIN-LABEL: fsgnjx_f16: ; RV32IZFHMIN: # %bb.0: -; RV32IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV32IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) -; RV32IZFHMIN-NEXT: fmv.x.h a1, fa0 -; RV32IZFHMIN-NEXT: lui a2, 1048568 -; RV32IZFHMIN-NEXT: and a1, a1, a2 -; RV32IZFHMIN-NEXT: slli a0, a0, 17 -; RV32IZFHMIN-NEXT: srli a0, a0, 17 -; RV32IZFHMIN-NEXT: or a0, a0, a1 +; RV32IZFHMIN-NEXT: fmv.x.h a0, fa0 +; RV32IZFHMIN-NEXT: lui a1, 1048568 +; RV32IZFHMIN-NEXT: and a0, a0, a1 +; RV32IZFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; RV32IZFHMIN-NEXT: lhu a1, %lo(.LCPI23_0)(a1) +; RV32IZFHMIN-NEXT: slli a1, a1, 17 +; RV32IZFHMIN-NEXT: srli a1, a1, 17 +; RV32IZFHMIN-NEXT: or a0, a1, a0 ; RV32IZFHMIN-NEXT: fmv.h.x fa5, a0 ; RV32IZFHMIN-NEXT: fcvt.s.h fa5, fa5 ; RV32IZFHMIN-NEXT: fcvt.s.h fa4, fa1 @@ -2902,14 +2902,14 @@ define half @fsgnjx_f16(half %x, half %y) nounwind { ; ; RV64IZFHMIN-LABEL: fsgnjx_f16: ; RV64IZFHMIN: # %bb.0: -; RV64IZFHMIN-NEXT: lui a0, %hi(.LCPI23_0) -; RV64IZFHMIN-NEXT: lhu a0, %lo(.LCPI23_0)(a0) -; RV64IZFHMIN-NEXT: fmv.x.h a1, fa0 -; RV64IZFHMIN-NEXT: lui a2, 1048568 -; RV64IZFHMIN-NEXT: and a1, a1, a2 -; RV64IZFHMIN-NEXT: slli a0, a0, 49 -; RV64IZFHMIN-NEXT: srli a0, a0, 49 -; RV64IZFHMIN-NEXT: or a0, a0, a1 +; RV64IZFHMIN-NEXT: fmv.x.h a0, fa0 +; RV64IZFHMIN-NEXT: lui a1, 1048568 +; RV64IZFHMIN-NEXT: and a0, a0, a1 +; RV64IZFHMIN-NEXT: lui a1, %hi(.LCPI23_0) +; RV64IZFHMIN-NEXT: lhu a1, %lo(.LCPI23_0)(a1) +; RV64IZFHMIN-NEXT: slli a1, a1, 49 +; RV64IZFHMIN-NEXT: srli a1, a1, 49 +; RV64IZFHMIN-NEXT: or a0, a1, a0 ; RV64IZFHMIN-NEXT: fmv.h.x fa5, a0 ; RV64IZFHMIN-NEXT: fcvt.s.h fa5, fa5 ; RV64IZFHMIN-NEXT: fcvt.s.h fa4, fa1 diff --git a/llvm/test/CodeGen/RISCV/half-convert-strict.ll b/llvm/test/CodeGen/RISCV/half-convert-strict.ll index 0a04d44893e75..5396fab3437c7 100644 --- a/llvm/test/CodeGen/RISCV/half-convert-strict.ll +++ b/llvm/test/CodeGen/RISCV/half-convert-strict.ll @@ -2519,12 +2519,12 @@ define half @fcvt_h_q(fp128 %a) nounwind strictfp { ; RV32IZFH-NEXT: lw a1, 0(a0) ; RV32IZFH-NEXT: lw a2, 4(a0) ; RV32IZFH-NEXT: lw a3, 8(a0) -; RV32IZFH-NEXT: lw a4, 12(a0) -; RV32IZFH-NEXT: addi a0, sp, 8 +; RV32IZFH-NEXT: lw a0, 12(a0) ; RV32IZFH-NEXT: sw a1, 8(sp) ; RV32IZFH-NEXT: sw a2, 12(sp) ; RV32IZFH-NEXT: sw a3, 16(sp) -; RV32IZFH-NEXT: sw a4, 20(sp) +; RV32IZFH-NEXT: sw a0, 20(sp) +; RV32IZFH-NEXT: addi a0, sp, 8 ; RV32IZFH-NEXT: call __trunctfhf2 ; RV32IZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 32 @@ -2546,12 +2546,12 @@ define half @fcvt_h_q(fp128 %a) nounwind strictfp { ; RV32IZHINX-NEXT: lw a1, 0(a0) ; RV32IZHINX-NEXT: lw a2, 4(a0) ; RV32IZHINX-NEXT: lw a3, 8(a0) -; RV32IZHINX-NEXT: lw a4, 12(a0) -; RV32IZHINX-NEXT: addi a0, sp, 8 +; RV32IZHINX-NEXT: lw a0, 12(a0) ; RV32IZHINX-NEXT: sw a1, 8(sp) ; RV32IZHINX-NEXT: sw a2, 12(sp) ; RV32IZHINX-NEXT: sw a3, 16(sp) -; RV32IZHINX-NEXT: sw a4, 20(sp) +; RV32IZHINX-NEXT: sw a0, 20(sp) +; RV32IZHINX-NEXT: addi a0, sp, 8 ; RV32IZHINX-NEXT: call __trunctfhf2 ; RV32IZHINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 32 @@ -2573,12 +2573,12 @@ define half @fcvt_h_q(fp128 %a) nounwind strictfp { ; RV32IDZFH-NEXT: lw a1, 0(a0) ; RV32IDZFH-NEXT: lw a2, 4(a0) ; RV32IDZFH-NEXT: lw a3, 8(a0) -; RV32IDZFH-NEXT: lw a4, 12(a0) -; RV32IDZFH-NEXT: addi a0, sp, 8 +; RV32IDZFH-NEXT: lw a0, 12(a0) ; RV32IDZFH-NEXT: sw a1, 8(sp) ; RV32IDZFH-NEXT: sw a2, 12(sp) ; RV32IDZFH-NEXT: sw a3, 16(sp) -; RV32IDZFH-NEXT: sw a4, 20(sp) +; RV32IDZFH-NEXT: sw a0, 20(sp) +; RV32IDZFH-NEXT: addi a0, sp, 8 ; RV32IDZFH-NEXT: call __trunctfhf2 ; RV32IDZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IDZFH-NEXT: addi sp, sp, 32 @@ -2600,12 +2600,12 @@ define half @fcvt_h_q(fp128 %a) nounwind strictfp { ; RV32IZDINXZHINX-NEXT: lw a1, 0(a0) ; RV32IZDINXZHINX-NEXT: lw a2, 4(a0) ; RV32IZDINXZHINX-NEXT: lw a3, 8(a0) -; RV32IZDINXZHINX-NEXT: lw a4, 12(a0) -; RV32IZDINXZHINX-NEXT: addi a0, sp, 8 +; RV32IZDINXZHINX-NEXT: lw a0, 12(a0) ; RV32IZDINXZHINX-NEXT: sw a1, 8(sp) ; RV32IZDINXZHINX-NEXT: sw a2, 12(sp) ; RV32IZDINXZHINX-NEXT: sw a3, 16(sp) -; RV32IZDINXZHINX-NEXT: sw a4, 20(sp) +; RV32IZDINXZHINX-NEXT: sw a0, 20(sp) +; RV32IZDINXZHINX-NEXT: addi a0, sp, 8 ; RV32IZDINXZHINX-NEXT: call __trunctfhf2 ; RV32IZDINXZHINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32IZDINXZHINX-NEXT: addi sp, sp, 32 @@ -2627,12 +2627,12 @@ define half @fcvt_h_q(fp128 %a) nounwind strictfp { ; CHECK32-IZFHMIN-NEXT: lw a1, 0(a0) ; CHECK32-IZFHMIN-NEXT: lw a2, 4(a0) ; CHECK32-IZFHMIN-NEXT: lw a3, 8(a0) -; CHECK32-IZFHMIN-NEXT: lw a4, 12(a0) -; CHECK32-IZFHMIN-NEXT: addi a0, sp, 8 +; CHECK32-IZFHMIN-NEXT: lw a0, 12(a0) ; CHECK32-IZFHMIN-NEXT: sw a1, 8(sp) ; CHECK32-IZFHMIN-NEXT: sw a2, 12(sp) ; CHECK32-IZFHMIN-NEXT: sw a3, 16(sp) -; CHECK32-IZFHMIN-NEXT: sw a4, 20(sp) +; CHECK32-IZFHMIN-NEXT: sw a0, 20(sp) +; CHECK32-IZFHMIN-NEXT: addi a0, sp, 8 ; CHECK32-IZFHMIN-NEXT: call __trunctfhf2 ; CHECK32-IZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK32-IZFHMIN-NEXT: addi sp, sp, 32 @@ -2654,12 +2654,12 @@ define half @fcvt_h_q(fp128 %a) nounwind strictfp { ; CHECK32-IZHINXMIN-NEXT: lw a1, 0(a0) ; CHECK32-IZHINXMIN-NEXT: lw a2, 4(a0) ; CHECK32-IZHINXMIN-NEXT: lw a3, 8(a0) -; CHECK32-IZHINXMIN-NEXT: lw a4, 12(a0) -; CHECK32-IZHINXMIN-NEXT: addi a0, sp, 8 +; CHECK32-IZHINXMIN-NEXT: lw a0, 12(a0) ; CHECK32-IZHINXMIN-NEXT: sw a1, 8(sp) ; CHECK32-IZHINXMIN-NEXT: sw a2, 12(sp) ; CHECK32-IZHINXMIN-NEXT: sw a3, 16(sp) -; CHECK32-IZHINXMIN-NEXT: sw a4, 20(sp) +; CHECK32-IZHINXMIN-NEXT: sw a0, 20(sp) +; CHECK32-IZHINXMIN-NEXT: addi a0, sp, 8 ; CHECK32-IZHINXMIN-NEXT: call __trunctfhf2 ; CHECK32-IZHINXMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK32-IZHINXMIN-NEXT: addi sp, sp, 32 @@ -2681,12 +2681,12 @@ define half @fcvt_h_q(fp128 %a) nounwind strictfp { ; CHECK32-IZDINXZHINXMIN-NEXT: lw a1, 0(a0) ; CHECK32-IZDINXZHINXMIN-NEXT: lw a2, 4(a0) ; CHECK32-IZDINXZHINXMIN-NEXT: lw a3, 8(a0) -; CHECK32-IZDINXZHINXMIN-NEXT: lw a4, 12(a0) -; CHECK32-IZDINXZHINXMIN-NEXT: addi a0, sp, 8 +; CHECK32-IZDINXZHINXMIN-NEXT: lw a0, 12(a0) ; CHECK32-IZDINXZHINXMIN-NEXT: sw a1, 8(sp) ; CHECK32-IZDINXZHINXMIN-NEXT: sw a2, 12(sp) ; CHECK32-IZDINXZHINXMIN-NEXT: sw a3, 16(sp) -; CHECK32-IZDINXZHINXMIN-NEXT: sw a4, 20(sp) +; CHECK32-IZDINXZHINXMIN-NEXT: sw a0, 20(sp) +; CHECK32-IZDINXZHINXMIN-NEXT: addi a0, sp, 8 ; CHECK32-IZDINXZHINXMIN-NEXT: call __trunctfhf2 ; CHECK32-IZDINXZHINXMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK32-IZDINXZHINXMIN-NEXT: addi sp, sp, 32 @@ -2708,12 +2708,12 @@ define half @fcvt_h_q(fp128 %a) nounwind strictfp { ; CHECK32-D-NEXT: lw a1, 0(a0) ; CHECK32-D-NEXT: lw a2, 4(a0) ; CHECK32-D-NEXT: lw a3, 8(a0) -; CHECK32-D-NEXT: lw a4, 12(a0) -; CHECK32-D-NEXT: addi a0, sp, 8 +; CHECK32-D-NEXT: lw a0, 12(a0) ; CHECK32-D-NEXT: sw a1, 8(sp) ; CHECK32-D-NEXT: sw a2, 12(sp) ; CHECK32-D-NEXT: sw a3, 16(sp) -; CHECK32-D-NEXT: sw a4, 20(sp) +; CHECK32-D-NEXT: sw a0, 20(sp) +; CHECK32-D-NEXT: addi a0, sp, 8 ; CHECK32-D-NEXT: call __trunctfhf2 ; CHECK32-D-NEXT: fmv.x.w a0, fa0 ; CHECK32-D-NEXT: lui a1, 1048560 diff --git a/llvm/test/CodeGen/RISCV/half-convert.ll b/llvm/test/CodeGen/RISCV/half-convert.ll index cf57ecd6cd1e4..7841f0209ce24 100644 --- a/llvm/test/CodeGen/RISCV/half-convert.ll +++ b/llvm/test/CodeGen/RISCV/half-convert.ll @@ -194,13 +194,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_si_h_sat: ; RV32IZFH: # %bb.0: # %start ; RV32IZFH-NEXT: fcvt.s.h fa5, fa0 -; RV32IZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IZFH-NEXT: feq.s a1, fa5, fa5 -; RV32IZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV32IZFH-NEXT: lui a0, 815104 -; RV32IZFH-NEXT: fmv.w.x fa3, a0 -; RV32IZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV32IZFH-NEXT: neg a0, a1 +; RV32IZFH-NEXT: lui a1, %hi(.LCPI1_0) +; RV32IZFH-NEXT: fmv.w.x fa4, a0 +; RV32IZFH-NEXT: feq.s a0, fa5, fa5 +; RV32IZFH-NEXT: neg a0, a0 +; RV32IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV32IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IZFH-NEXT: fcvt.w.s a1, fa5, rtz ; RV32IZFH-NEXT: and a0, a0, a1 @@ -209,13 +209,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64IZFH-LABEL: fcvt_si_h_sat: ; RV64IZFH: # %bb.0: # %start ; RV64IZFH-NEXT: fcvt.s.h fa5, fa0 -; RV64IZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV64IZFH-NEXT: feq.s a1, fa5, fa5 -; RV64IZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV64IZFH-NEXT: lui a0, 815104 -; RV64IZFH-NEXT: fmv.w.x fa3, a0 -; RV64IZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IZFH-NEXT: neg a0, a1 +; RV64IZFH-NEXT: lui a1, %hi(.LCPI1_0) +; RV64IZFH-NEXT: fmv.w.x fa4, a0 +; RV64IZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IZFH-NEXT: neg a0, a0 +; RV64IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IZFH-NEXT: and a0, a0, a1 @@ -224,13 +224,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV32IDZFH-LABEL: fcvt_si_h_sat: ; RV32IDZFH: # %bb.0: # %start ; RV32IDZFH-NEXT: fcvt.s.h fa5, fa0 -; RV32IDZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV32IDZFH-NEXT: feq.s a1, fa5, fa5 -; RV32IDZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV32IDZFH-NEXT: lui a0, 815104 -; RV32IDZFH-NEXT: fmv.w.x fa3, a0 -; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV32IDZFH-NEXT: neg a0, a1 +; RV32IDZFH-NEXT: lui a1, %hi(.LCPI1_0) +; RV32IDZFH-NEXT: fmv.w.x fa4, a0 +; RV32IDZFH-NEXT: feq.s a0, fa5, fa5 +; RV32IDZFH-NEXT: neg a0, a0 +; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IDZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV32IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IDZFH-NEXT: fcvt.w.s a1, fa5, rtz ; RV32IDZFH-NEXT: and a0, a0, a1 @@ -239,13 +239,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64IDZFH-LABEL: fcvt_si_h_sat: ; RV64IDZFH: # %bb.0: # %start ; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0 -; RV64IDZFH-NEXT: lui a0, %hi(.LCPI1_0) -; RV64IDZFH-NEXT: feq.s a1, fa5, fa5 -; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV64IDZFH-NEXT: lui a0, 815104 -; RV64IDZFH-NEXT: fmv.w.x fa3, a0 -; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IDZFH-NEXT: neg a0, a1 +; RV64IDZFH-NEXT: lui a1, %hi(.LCPI1_0) +; RV64IDZFH-NEXT: fmv.w.x fa4, a0 +; RV64IDZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IDZFH-NEXT: neg a0, a0 +; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IDZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IDZFH-NEXT: and a0, a0, a1 @@ -399,13 +399,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 -; RV32ID-ILP32-NEXT: lui a0, %hi(.LCPI1_0) -; RV32ID-ILP32-NEXT: feq.s a1, fa5, fa5 -; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV32ID-ILP32-NEXT: lui a0, 815104 -; RV32ID-ILP32-NEXT: fmv.w.x fa3, a0 -; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa3 -; RV32ID-ILP32-NEXT: neg a0, a1 +; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI1_0) +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: feq.s a0, fa5, fa5 +; RV32ID-ILP32-NEXT: neg a0, a0 +; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa4 +; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV32ID-ILP32-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-ILP32-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-ILP32-NEXT: and a0, a0, a1 @@ -419,13 +419,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: call __extendhfsf2 ; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 -; RV64ID-LP64-NEXT: lui a0, %hi(.LCPI1_0) -; RV64ID-LP64-NEXT: feq.s a1, fa5, fa5 -; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; RV64ID-LP64-NEXT: lui a0, 815104 -; RV64ID-LP64-NEXT: fmv.w.x fa3, a0 -; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa3 -; RV64ID-LP64-NEXT: neg a0, a1 +; RV64ID-LP64-NEXT: lui a1, %hi(.LCPI1_0) +; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64-NEXT: feq.s a0, fa5, fa5 +; RV64ID-LP64-NEXT: neg a0, a0 +; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa4 +; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-LP64-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-LP64-NEXT: and a0, a0, a1 @@ -439,13 +439,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 ; RV32ID-NEXT: feq.s a0, fa0, fa0 -; RV32ID-NEXT: lui a1, %hi(.LCPI1_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI1_0)(a1) ; RV32ID-NEXT: lui a1, 815104 -; RV32ID-NEXT: fmv.w.x fa4, a1 -; RV32ID-NEXT: fmax.s fa4, fa0, fa4 +; RV32ID-NEXT: fmv.w.x fa5, a1 +; RV32ID-NEXT: lui a1, %hi(.LCPI1_0) ; RV32ID-NEXT: neg a0, a0 -; RV32ID-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-NEXT: fmax.s fa5, fa0, fa5 +; RV32ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1) +; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-NEXT: and a0, a0, a1 ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -458,13 +458,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-NEXT: call __extendhfsf2 ; RV64ID-NEXT: feq.s a0, fa0, fa0 -; RV64ID-NEXT: lui a1, %hi(.LCPI1_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI1_0)(a1) ; RV64ID-NEXT: lui a1, 815104 -; RV64ID-NEXT: fmv.w.x fa4, a1 -; RV64ID-NEXT: fmax.s fa4, fa0, fa4 +; RV64ID-NEXT: fmv.w.x fa5, a1 +; RV64ID-NEXT: lui a1, %hi(.LCPI1_0) ; RV64ID-NEXT: neg a0, a0 -; RV64ID-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-NEXT: fmax.s fa5, fa0, fa5 +; RV64ID-NEXT: flw fa4, %lo(.LCPI1_0)(a1) +; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -474,13 +474,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; CHECK32-IZFHMIN-LABEL: fcvt_si_h_sat: ; CHECK32-IZFHMIN: # %bb.0: # %start ; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 -; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK32-IZFHMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK32-IZFHMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; CHECK32-IZFHMIN-NEXT: lui a0, 815104 -; CHECK32-IZFHMIN-NEXT: fmv.w.x fa3, a0 -; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK32-IZFHMIN-NEXT: neg a0, a1 +; CHECK32-IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0) +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32-IZFHMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32-IZFHMIN-NEXT: neg a0, a0 +; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32-IZFHMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32-IZFHMIN-NEXT: fcvt.w.s a1, fa5, rtz ; CHECK32-IZFHMIN-NEXT: and a0, a0, a1 @@ -489,13 +489,13 @@ define i16 @fcvt_si_h_sat(half %a) nounwind { ; CHECK64-IZFHMIN-LABEL: fcvt_si_h_sat: ; CHECK64-IZFHMIN: # %bb.0: # %start ; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 -; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK64-IZFHMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a0) ; CHECK64-IZFHMIN-NEXT: lui a0, 815104 -; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, a0 -; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK64-IZFHMIN-NEXT: neg a0, a1 +; CHECK64-IZFHMIN-NEXT: lui a1, %hi(.LCPI1_0) +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64-IZFHMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64-IZFHMIN-NEXT: neg a0, a0 +; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI1_0)(a1) ; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64-IZFHMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64-IZFHMIN-NEXT: and a0, a0, a1 @@ -711,45 +711,45 @@ define i16 @fcvt_ui_h(half %a) nounwind { define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_ui_h_sat: ; RV32IZFH: # %bb.0: # %start +; RV32IZFH-NEXT: fcvt.s.h fa5, fa0 +; RV32IZFH-NEXT: fmv.w.x fa4, zero ; RV32IZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV32IZFH-NEXT: fcvt.s.h fa4, fa0 -; RV32IZFH-NEXT: fmv.w.x fa3, zero -; RV32IZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV32IZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV32IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IZFH-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; RV32IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IZFH-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_ui_h_sat: ; RV64IZFH: # %bb.0: # %start +; RV64IZFH-NEXT: fcvt.s.h fa5, fa0 +; RV64IZFH-NEXT: fmv.w.x fa4, zero ; RV64IZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV64IZFH-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV64IZFH-NEXT: fcvt.s.h fa4, fa0 -; RV64IZFH-NEXT: fmv.w.x fa3, zero -; RV64IZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV64IZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV64IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IZFH-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IZFH-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_ui_h_sat: ; RV32IDZFH: # %bb.0: # %start +; RV32IDZFH-NEXT: fcvt.s.h fa5, fa0 +; RV32IDZFH-NEXT: fmv.w.x fa4, zero ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV32IDZFH-NEXT: fcvt.s.h fa4, fa0 -; RV32IDZFH-NEXT: fmv.w.x fa3, zero -; RV32IDZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV32IDZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IDZFH-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; RV32IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IDZFH-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_ui_h_sat: ; RV64IDZFH: # %bb.0: # %start +; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0 +; RV64IDZFH-NEXT: fmv.w.x fa4, zero ; RV64IDZFH-NEXT: lui a0, %hi(.LCPI3_0) -; RV64IDZFH-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV64IDZFH-NEXT: fcvt.s.h fa4, fa0 -; RV64IDZFH-NEXT: fmv.w.x fa3, zero -; RV64IDZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV64IDZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IDZFH-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64IDZFH-NEXT: ret ; @@ -874,12 +874,12 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV32ID-ILP32-NEXT: addi sp, sp, -16 ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 -; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI3_0) -; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI3_0)(a1) -; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32-NEXT: fmv.w.x fa3, zero -; RV32ID-ILP32-NEXT: fmax.s fa4, fa4, fa3 -; RV32ID-ILP32-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, zero +; RV32ID-ILP32-NEXT: lui a0, %hi(.LCPI3_0) +; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa4 +; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; RV32ID-ILP32-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-ILP32-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ID-ILP32-NEXT: addi sp, sp, 16 @@ -890,12 +890,12 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV64ID-LP64-NEXT: addi sp, sp, -16 ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: call __extendhfsf2 -; RV64ID-LP64-NEXT: lui a1, %hi(.LCPI3_0) -; RV64ID-LP64-NEXT: flw fa5, %lo(.LCPI3_0)(a1) -; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 -; RV64ID-LP64-NEXT: fmv.w.x fa3, zero -; RV64ID-LP64-NEXT: fmax.s fa4, fa4, fa3 -; RV64ID-LP64-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64-NEXT: fmv.w.x fa4, zero +; RV64ID-LP64-NEXT: lui a0, %hi(.LCPI3_0) +; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa4 +; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-LP64-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64ID-LP64-NEXT: addi sp, sp, 16 @@ -906,11 +906,11 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV32ID-NEXT: addi sp, sp, -16 ; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 +; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: lui a0, %hi(.LCPI3_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV32ID-NEXT: fmv.w.x fa4, zero -; RV32ID-NEXT: fmax.s fa4, fa0, fa4 -; RV32ID-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-NEXT: fmax.s fa5, fa0, fa5 +; RV32ID-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: addi sp, sp, 16 @@ -921,11 +921,11 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; RV64ID-NEXT: addi sp, sp, -16 ; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-NEXT: call __extendhfsf2 +; RV64ID-NEXT: fmv.w.x fa5, zero ; RV64ID-NEXT: lui a0, %hi(.LCPI3_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; RV64ID-NEXT: fmv.w.x fa4, zero -; RV64ID-NEXT: fmax.s fa4, fa0, fa4 -; RV64ID-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-NEXT: fmax.s fa5, fa0, fa5 +; RV64ID-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64ID-NEXT: addi sp, sp, 16 @@ -933,23 +933,23 @@ define i16 @fcvt_ui_h_sat(half %a) nounwind { ; ; CHECK32-IZFHMIN-LABEL: fcvt_ui_h_sat: ; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, zero ; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK32-IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa4, fa0 -; CHECK32-IZFHMIN-NEXT: fmv.w.x fa3, zero -; CHECK32-IZFHMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32-IZFHMIN-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; CHECK32-IZFHMIN-NEXT: ret ; ; CHECK64-IZFHMIN-LABEL: fcvt_ui_h_sat: ; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, zero ; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK64-IZFHMIN-NEXT: flw fa5, %lo(.LCPI3_0)(a0) -; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa4, fa0 -; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, zero -; CHECK64-IZFHMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI3_0)(a0) +; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, fa5, rtz ; CHECK64-IZFHMIN-NEXT: ret ; @@ -2904,14 +2904,14 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 ; RV32IZFH-NEXT: lui a0, %hi(.LCPI12_0) +; RV32IZFH-NEXT: fmv.w.x fa5, zero +; RV32IZFH-NEXT: fle.s a1, fa5, fa0 ; RV32IZFH-NEXT: flw fa5, %lo(.LCPI12_0)(a0) -; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IZFH-NEXT: fmv.w.x fa4, zero -; RV32IZFH-NEXT: fle.s a0, fa4, fa0 -; RV32IZFH-NEXT: flt.s a1, fa5, fa0 -; RV32IZFH-NEXT: neg s0, a1 -; RV32IZFH-NEXT: neg s1, a0 +; RV32IZFH-NEXT: flt.s a0, fa5, fa0 +; RV32IZFH-NEXT: neg s0, a0 +; RV32IZFH-NEXT: neg s1, a1 ; RV32IZFH-NEXT: call __fixunssfdi ; RV32IZFH-NEXT: and a0, s1, a0 ; RV32IZFH-NEXT: and a1, s1, a1 @@ -2938,14 +2938,14 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32IDZFH-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32IDZFH-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32IDZFH-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI12_0) +; RV32IDZFH-NEXT: fmv.w.x fa5, zero +; RV32IDZFH-NEXT: fle.s a1, fa5, fa0 ; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI12_0)(a0) -; RV32IDZFH-NEXT: fcvt.s.h fa0, fa0 -; RV32IDZFH-NEXT: fmv.w.x fa4, zero -; RV32IDZFH-NEXT: fle.s a0, fa4, fa0 -; RV32IDZFH-NEXT: flt.s a1, fa5, fa0 -; RV32IDZFH-NEXT: neg s0, a1 -; RV32IDZFH-NEXT: neg s1, a0 +; RV32IDZFH-NEXT: flt.s a0, fa5, fa0 +; RV32IDZFH-NEXT: neg s0, a0 +; RV32IDZFH-NEXT: neg s1, a1 ; RV32IDZFH-NEXT: call __fixunssfdi ; RV32IDZFH-NEXT: and a0, s1, a0 ; RV32IDZFH-NEXT: and a1, s1, a1 @@ -3103,14 +3103,14 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32ID-ILP32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 ; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI12_0) -; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI12_0)(a1) -; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32-NEXT: fmv.w.x fa3, zero -; RV32ID-ILP32-NEXT: fle.s a1, fa3, fa4 -; RV32ID-ILP32-NEXT: flt.s a2, fa5, fa4 -; RV32ID-ILP32-NEXT: neg s0, a2 -; RV32ID-ILP32-NEXT: neg s1, a1 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, zero +; RV32ID-ILP32-NEXT: fle.s a2, fa4, fa5 +; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI12_0)(a1) +; RV32ID-ILP32-NEXT: flt.s a1, fa4, fa5 +; RV32ID-ILP32-NEXT: neg s0, a1 +; RV32ID-ILP32-NEXT: neg s1, a2 ; RV32ID-ILP32-NEXT: call __fixunssfdi ; RV32ID-ILP32-NEXT: and a0, s1, a0 ; RV32ID-ILP32-NEXT: and a1, s1, a1 @@ -3145,12 +3145,12 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; RV32ID-NEXT: sw s1, 4(sp) # 4-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 ; RV32ID-NEXT: lui a0, %hi(.LCPI12_0) +; RV32ID-NEXT: fmv.w.x fa5, zero +; RV32ID-NEXT: fle.s a1, fa5, fa0 ; RV32ID-NEXT: flw fa5, %lo(.LCPI12_0)(a0) -; RV32ID-NEXT: fmv.w.x fa4, zero -; RV32ID-NEXT: fle.s a0, fa4, fa0 -; RV32ID-NEXT: flt.s a1, fa5, fa0 -; RV32ID-NEXT: neg s0, a1 -; RV32ID-NEXT: neg s1, a0 +; RV32ID-NEXT: flt.s a0, fa5, fa0 +; RV32ID-NEXT: neg s0, a0 +; RV32ID-NEXT: neg s1, a1 ; RV32ID-NEXT: call __fixunssfdi ; RV32ID-NEXT: and a0, s1, a0 ; RV32ID-NEXT: and a1, s1, a1 @@ -3182,14 +3182,14 @@ define i64 @fcvt_lu_h_sat(half %a) nounwind { ; CHECK32-IZFHMIN-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; CHECK32-IZFHMIN-NEXT: sw s0, 8(sp) # 4-byte Folded Spill ; CHECK32-IZFHMIN-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa0, fa0 ; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa5, zero +; CHECK32-IZFHMIN-NEXT: fle.s a1, fa5, fa0 ; CHECK32-IZFHMIN-NEXT: flw fa5, %lo(.LCPI12_0)(a0) -; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa0, fa0 -; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, zero -; CHECK32-IZFHMIN-NEXT: fle.s a0, fa4, fa0 -; CHECK32-IZFHMIN-NEXT: flt.s a1, fa5, fa0 -; CHECK32-IZFHMIN-NEXT: neg s0, a1 -; CHECK32-IZFHMIN-NEXT: neg s1, a0 +; CHECK32-IZFHMIN-NEXT: flt.s a0, fa5, fa0 +; CHECK32-IZFHMIN-NEXT: neg s0, a0 +; CHECK32-IZFHMIN-NEXT: neg s1, a1 ; CHECK32-IZFHMIN-NEXT: call __fixunssfdi ; CHECK32-IZFHMIN-NEXT: and a0, s1, a0 ; CHECK32-IZFHMIN-NEXT: and a1, s1, a1 @@ -6296,13 +6296,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_w_s_sat_i16: ; RV32IZFH: # %bb.0: # %start ; RV32IZFH-NEXT: fcvt.s.h fa5, fa0 -; RV32IZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV32IZFH-NEXT: feq.s a1, fa5, fa5 -; RV32IZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV32IZFH-NEXT: lui a0, 815104 -; RV32IZFH-NEXT: fmv.w.x fa3, a0 -; RV32IZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV32IZFH-NEXT: neg a0, a1 +; RV32IZFH-NEXT: lui a1, %hi(.LCPI32_0) +; RV32IZFH-NEXT: fmv.w.x fa4, a0 +; RV32IZFH-NEXT: feq.s a0, fa5, fa5 +; RV32IZFH-NEXT: neg a0, a0 +; RV32IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; RV32IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IZFH-NEXT: fcvt.w.s a1, fa5, rtz ; RV32IZFH-NEXT: and a0, a0, a1 @@ -6311,13 +6311,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64IZFH-LABEL: fcvt_w_s_sat_i16: ; RV64IZFH: # %bb.0: # %start ; RV64IZFH-NEXT: fcvt.s.h fa5, fa0 -; RV64IZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV64IZFH-NEXT: feq.s a1, fa5, fa5 -; RV64IZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV64IZFH-NEXT: lui a0, 815104 -; RV64IZFH-NEXT: fmv.w.x fa3, a0 -; RV64IZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IZFH-NEXT: neg a0, a1 +; RV64IZFH-NEXT: lui a1, %hi(.LCPI32_0) +; RV64IZFH-NEXT: fmv.w.x fa4, a0 +; RV64IZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IZFH-NEXT: neg a0, a0 +; RV64IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IZFH-NEXT: and a0, a0, a1 @@ -6326,13 +6326,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32IDZFH-LABEL: fcvt_w_s_sat_i16: ; RV32IDZFH: # %bb.0: # %start ; RV32IDZFH-NEXT: fcvt.s.h fa5, fa0 -; RV32IDZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV32IDZFH-NEXT: feq.s a1, fa5, fa5 -; RV32IDZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV32IDZFH-NEXT: lui a0, 815104 -; RV32IDZFH-NEXT: fmv.w.x fa3, a0 -; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV32IDZFH-NEXT: neg a0, a1 +; RV32IDZFH-NEXT: lui a1, %hi(.LCPI32_0) +; RV32IDZFH-NEXT: fmv.w.x fa4, a0 +; RV32IDZFH-NEXT: feq.s a0, fa5, fa5 +; RV32IDZFH-NEXT: neg a0, a0 +; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IDZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; RV32IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IDZFH-NEXT: fcvt.w.s a1, fa5, rtz ; RV32IDZFH-NEXT: and a0, a0, a1 @@ -6341,13 +6341,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64IDZFH-LABEL: fcvt_w_s_sat_i16: ; RV64IDZFH: # %bb.0: # %start ; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0 -; RV64IDZFH-NEXT: lui a0, %hi(.LCPI32_0) -; RV64IDZFH-NEXT: feq.s a1, fa5, fa5 -; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV64IDZFH-NEXT: lui a0, 815104 -; RV64IDZFH-NEXT: fmv.w.x fa3, a0 -; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa3 -; RV64IDZFH-NEXT: neg a0, a1 +; RV64IDZFH-NEXT: lui a1, %hi(.LCPI32_0) +; RV64IDZFH-NEXT: fmv.w.x fa4, a0 +; RV64IDZFH-NEXT: feq.s a0, fa5, fa5 +; RV64IDZFH-NEXT: neg a0, a0 +; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IDZFH-NEXT: fcvt.l.s a1, fa5, rtz ; RV64IDZFH-NEXT: and a0, a0, a1 @@ -6505,13 +6505,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 ; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 -; RV32ID-ILP32-NEXT: lui a0, %hi(.LCPI32_0) -; RV32ID-ILP32-NEXT: feq.s a1, fa5, fa5 -; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV32ID-ILP32-NEXT: lui a0, 815104 -; RV32ID-ILP32-NEXT: fmv.w.x fa3, a0 -; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa3 -; RV32ID-ILP32-NEXT: neg a0, a1 +; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI32_0) +; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 +; RV32ID-ILP32-NEXT: feq.s a0, fa5, fa5 +; RV32ID-ILP32-NEXT: neg a0, a0 +; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa4 +; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; RV32ID-ILP32-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-ILP32-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-ILP32-NEXT: and a0, a0, a1 @@ -6525,13 +6525,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: call __extendhfsf2 ; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 -; RV64ID-LP64-NEXT: lui a0, %hi(.LCPI32_0) -; RV64ID-LP64-NEXT: feq.s a1, fa5, fa5 -; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; RV64ID-LP64-NEXT: lui a0, 815104 -; RV64ID-LP64-NEXT: fmv.w.x fa3, a0 -; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa3 -; RV64ID-LP64-NEXT: neg a0, a1 +; RV64ID-LP64-NEXT: lui a1, %hi(.LCPI32_0) +; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 +; RV64ID-LP64-NEXT: feq.s a0, fa5, fa5 +; RV64ID-LP64-NEXT: neg a0, a0 +; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa4 +; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-LP64-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-LP64-NEXT: and a0, a0, a1 @@ -6545,13 +6545,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 ; RV32ID-NEXT: feq.s a0, fa0, fa0 -; RV32ID-NEXT: lui a1, %hi(.LCPI32_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI32_0)(a1) ; RV32ID-NEXT: lui a1, 815104 -; RV32ID-NEXT: fmv.w.x fa4, a1 -; RV32ID-NEXT: fmax.s fa4, fa0, fa4 +; RV32ID-NEXT: fmv.w.x fa5, a1 +; RV32ID-NEXT: lui a1, %hi(.LCPI32_0) ; RV32ID-NEXT: neg a0, a0 -; RV32ID-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-NEXT: fmax.s fa5, fa0, fa5 +; RV32ID-NEXT: flw fa4, %lo(.LCPI32_0)(a1) +; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.w.s a1, fa5, rtz ; RV32ID-NEXT: and a0, a0, a1 ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload @@ -6564,13 +6564,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-NEXT: call __extendhfsf2 ; RV64ID-NEXT: feq.s a0, fa0, fa0 -; RV64ID-NEXT: lui a1, %hi(.LCPI32_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI32_0)(a1) ; RV64ID-NEXT: lui a1, 815104 -; RV64ID-NEXT: fmv.w.x fa4, a1 -; RV64ID-NEXT: fmax.s fa4, fa0, fa4 +; RV64ID-NEXT: fmv.w.x fa5, a1 +; RV64ID-NEXT: lui a1, %hi(.LCPI32_0) ; RV64ID-NEXT: neg a0, a0 -; RV64ID-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-NEXT: fmax.s fa5, fa0, fa5 +; RV64ID-NEXT: flw fa4, %lo(.LCPI32_0)(a1) +; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.l.s a1, fa5, rtz ; RV64ID-NEXT: and a0, a0, a1 ; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -6580,13 +6580,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; CHECK32-IZFHMIN-LABEL: fcvt_w_s_sat_i16: ; CHECK32-IZFHMIN: # %bb.0: # %start ; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 -; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI32_0) -; CHECK32-IZFHMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK32-IZFHMIN-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; CHECK32-IZFHMIN-NEXT: lui a0, 815104 -; CHECK32-IZFHMIN-NEXT: fmv.w.x fa3, a0 -; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK32-IZFHMIN-NEXT: neg a0, a1 +; CHECK32-IZFHMIN-NEXT: lui a1, %hi(.LCPI32_0) +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK32-IZFHMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK32-IZFHMIN-NEXT: neg a0, a0 +; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32-IZFHMIN-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32-IZFHMIN-NEXT: fcvt.w.s a1, fa5, rtz ; CHECK32-IZFHMIN-NEXT: and a0, a0, a1 @@ -6595,13 +6595,13 @@ define signext i16 @fcvt_w_s_sat_i16(half %a) nounwind { ; CHECK64-IZFHMIN-LABEL: fcvt_w_s_sat_i16: ; CHECK64-IZFHMIN: # %bb.0: # %start ; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 -; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI32_0) -; CHECK64-IZFHMIN-NEXT: feq.s a1, fa5, fa5 -; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI32_0)(a0) ; CHECK64-IZFHMIN-NEXT: lui a0, 815104 -; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, a0 -; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa3 -; CHECK64-IZFHMIN-NEXT: neg a0, a1 +; CHECK64-IZFHMIN-NEXT: lui a1, %hi(.LCPI32_0) +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, a0 +; CHECK64-IZFHMIN-NEXT: feq.s a0, fa5, fa5 +; CHECK64-IZFHMIN-NEXT: neg a0, a0 +; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI32_0)(a1) ; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64-IZFHMIN-NEXT: fcvt.l.s a1, fa5, rtz ; CHECK64-IZFHMIN-NEXT: and a0, a0, a1 @@ -6816,45 +6816,45 @@ define zeroext i16 @fcvt_wu_s_i16(half %a) nounwind { define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; RV32IZFH-LABEL: fcvt_wu_s_sat_i16: ; RV32IZFH: # %bb.0: # %start +; RV32IZFH-NEXT: fcvt.s.h fa5, fa0 +; RV32IZFH-NEXT: fmv.w.x fa4, zero ; RV32IZFH-NEXT: lui a0, %hi(.LCPI34_0) -; RV32IZFH-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV32IZFH-NEXT: fcvt.s.h fa4, fa0 -; RV32IZFH-NEXT: fmv.w.x fa3, zero -; RV32IZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV32IZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV32IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IZFH-NEXT: flw fa4, %lo(.LCPI34_0)(a0) +; RV32IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IZFH-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32IZFH-NEXT: ret ; ; RV64IZFH-LABEL: fcvt_wu_s_sat_i16: ; RV64IZFH: # %bb.0: # %start +; RV64IZFH-NEXT: fcvt.s.h fa5, fa0 +; RV64IZFH-NEXT: fmv.w.x fa4, zero ; RV64IZFH-NEXT: lui a0, %hi(.LCPI34_0) -; RV64IZFH-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV64IZFH-NEXT: fcvt.s.h fa4, fa0 -; RV64IZFH-NEXT: fmv.w.x fa3, zero -; RV64IZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV64IZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV64IZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IZFH-NEXT: flw fa4, %lo(.LCPI34_0)(a0) +; RV64IZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IZFH-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64IZFH-NEXT: ret ; ; RV32IDZFH-LABEL: fcvt_wu_s_sat_i16: ; RV32IDZFH: # %bb.0: # %start +; RV32IDZFH-NEXT: fcvt.s.h fa5, fa0 +; RV32IDZFH-NEXT: fmv.w.x fa4, zero ; RV32IDZFH-NEXT: lui a0, %hi(.LCPI34_0) -; RV32IDZFH-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV32IDZFH-NEXT: fcvt.s.h fa4, fa0 -; RV32IDZFH-NEXT: fmv.w.x fa3, zero -; RV32IDZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV32IDZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV32IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV32IDZFH-NEXT: flw fa4, %lo(.LCPI34_0)(a0) +; RV32IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV32IDZFH-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32IDZFH-NEXT: ret ; ; RV64IDZFH-LABEL: fcvt_wu_s_sat_i16: ; RV64IDZFH: # %bb.0: # %start +; RV64IDZFH-NEXT: fcvt.s.h fa5, fa0 +; RV64IDZFH-NEXT: fmv.w.x fa4, zero ; RV64IDZFH-NEXT: lui a0, %hi(.LCPI34_0) -; RV64IDZFH-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV64IDZFH-NEXT: fcvt.s.h fa4, fa0 -; RV64IDZFH-NEXT: fmv.w.x fa3, zero -; RV64IDZFH-NEXT: fmax.s fa4, fa4, fa3 -; RV64IDZFH-NEXT: fmin.s fa5, fa4, fa5 +; RV64IDZFH-NEXT: fmax.s fa5, fa5, fa4 +; RV64IDZFH-NEXT: flw fa4, %lo(.LCPI34_0)(a0) +; RV64IDZFH-NEXT: fmin.s fa5, fa5, fa4 ; RV64IDZFH-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64IDZFH-NEXT: ret ; @@ -6985,12 +6985,12 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; RV32ID-ILP32-NEXT: addi sp, sp, -16 ; RV32ID-ILP32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: call __extendhfsf2 -; RV32ID-ILP32-NEXT: lui a1, %hi(.LCPI34_0) -; RV32ID-ILP32-NEXT: flw fa5, %lo(.LCPI34_0)(a1) -; RV32ID-ILP32-NEXT: fmv.w.x fa4, a0 -; RV32ID-ILP32-NEXT: fmv.w.x fa3, zero -; RV32ID-ILP32-NEXT: fmax.s fa4, fa4, fa3 -; RV32ID-ILP32-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-ILP32-NEXT: fmv.w.x fa5, a0 +; RV32ID-ILP32-NEXT: fmv.w.x fa4, zero +; RV32ID-ILP32-NEXT: lui a0, %hi(.LCPI34_0) +; RV32ID-ILP32-NEXT: fmax.s fa5, fa5, fa4 +; RV32ID-ILP32-NEXT: flw fa4, %lo(.LCPI34_0)(a0) +; RV32ID-ILP32-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-ILP32-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32ID-ILP32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ID-ILP32-NEXT: addi sp, sp, 16 @@ -7001,12 +7001,12 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; RV64ID-LP64-NEXT: addi sp, sp, -16 ; RV64ID-LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-LP64-NEXT: call __extendhfsf2 -; RV64ID-LP64-NEXT: lui a1, %hi(.LCPI34_0) -; RV64ID-LP64-NEXT: flw fa5, %lo(.LCPI34_0)(a1) -; RV64ID-LP64-NEXT: fmv.w.x fa4, a0 -; RV64ID-LP64-NEXT: fmv.w.x fa3, zero -; RV64ID-LP64-NEXT: fmax.s fa4, fa4, fa3 -; RV64ID-LP64-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-LP64-NEXT: fmv.w.x fa5, a0 +; RV64ID-LP64-NEXT: fmv.w.x fa4, zero +; RV64ID-LP64-NEXT: lui a0, %hi(.LCPI34_0) +; RV64ID-LP64-NEXT: fmax.s fa5, fa5, fa4 +; RV64ID-LP64-NEXT: flw fa4, %lo(.LCPI34_0)(a0) +; RV64ID-LP64-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-LP64-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64ID-LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64ID-LP64-NEXT: addi sp, sp, 16 @@ -7017,11 +7017,11 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; RV32ID-NEXT: addi sp, sp, -16 ; RV32ID-NEXT: sw ra, 12(sp) # 4-byte Folded Spill ; RV32ID-NEXT: call __extendhfsf2 +; RV32ID-NEXT: fmv.w.x fa5, zero ; RV32ID-NEXT: lui a0, %hi(.LCPI34_0) -; RV32ID-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV32ID-NEXT: fmv.w.x fa4, zero -; RV32ID-NEXT: fmax.s fa4, fa0, fa4 -; RV32ID-NEXT: fmin.s fa5, fa4, fa5 +; RV32ID-NEXT: fmax.s fa5, fa0, fa5 +; RV32ID-NEXT: flw fa4, %lo(.LCPI34_0)(a0) +; RV32ID-NEXT: fmin.s fa5, fa5, fa4 ; RV32ID-NEXT: fcvt.wu.s a0, fa5, rtz ; RV32ID-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32ID-NEXT: addi sp, sp, 16 @@ -7032,11 +7032,11 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; RV64ID-NEXT: addi sp, sp, -16 ; RV64ID-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; RV64ID-NEXT: call __extendhfsf2 +; RV64ID-NEXT: fmv.w.x fa5, zero ; RV64ID-NEXT: lui a0, %hi(.LCPI34_0) -; RV64ID-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; RV64ID-NEXT: fmv.w.x fa4, zero -; RV64ID-NEXT: fmax.s fa4, fa0, fa4 -; RV64ID-NEXT: fmin.s fa5, fa4, fa5 +; RV64ID-NEXT: fmax.s fa5, fa0, fa5 +; RV64ID-NEXT: flw fa4, %lo(.LCPI34_0)(a0) +; RV64ID-NEXT: fmin.s fa5, fa5, fa4 ; RV64ID-NEXT: fcvt.lu.s a0, fa5, rtz ; RV64ID-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64ID-NEXT: addi sp, sp, 16 @@ -7044,23 +7044,23 @@ define zeroext i16 @fcvt_wu_s_sat_i16(half %a) nounwind { ; ; CHECK32-IZFHMIN-LABEL: fcvt_wu_s_sat_i16: ; CHECK32-IZFHMIN: # %bb.0: # %start +; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; CHECK32-IZFHMIN-NEXT: fmv.w.x fa4, zero ; CHECK32-IZFHMIN-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK32-IZFHMIN-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; CHECK32-IZFHMIN-NEXT: fcvt.s.h fa4, fa0 -; CHECK32-IZFHMIN-NEXT: fmv.w.x fa3, zero -; CHECK32-IZFHMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK32-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK32-IZFHMIN-NEXT: flw fa4, %lo(.LCPI34_0)(a0) +; CHECK32-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK32-IZFHMIN-NEXT: fcvt.wu.s a0, fa5, rtz ; CHECK32-IZFHMIN-NEXT: ret ; ; CHECK64-IZFHMIN-LABEL: fcvt_wu_s_sat_i16: ; CHECK64-IZFHMIN: # %bb.0: # %start +; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa5, fa0 +; CHECK64-IZFHMIN-NEXT: fmv.w.x fa4, zero ; CHECK64-IZFHMIN-NEXT: lui a0, %hi(.LCPI34_0) -; CHECK64-IZFHMIN-NEXT: flw fa5, %lo(.LCPI34_0)(a0) -; CHECK64-IZFHMIN-NEXT: fcvt.s.h fa4, fa0 -; CHECK64-IZFHMIN-NEXT: fmv.w.x fa3, zero -; CHECK64-IZFHMIN-NEXT: fmax.s fa4, fa4, fa3 -; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa4, fa5 +; CHECK64-IZFHMIN-NEXT: fmax.s fa5, fa5, fa4 +; CHECK64-IZFHMIN-NEXT: flw fa4, %lo(.LCPI34_0)(a0) +; CHECK64-IZFHMIN-NEXT: fmin.s fa5, fa5, fa4 ; CHECK64-IZFHMIN-NEXT: fcvt.lu.s a0, fa5, rtz ; CHECK64-IZFHMIN-NEXT: ret ; @@ -8595,16 +8595,16 @@ define void @fcvt_h_q(fp128 %x, ptr %y) nounwind { ; RV32IZFH-NEXT: addi sp, sp, -32 ; RV32IZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZFH-NEXT: lw a2, 0(a0) -; RV32IZFH-NEXT: lw a3, 4(a0) -; RV32IZFH-NEXT: lw a4, 8(a0) -; RV32IZFH-NEXT: lw a5, 12(a0) ; RV32IZFH-NEXT: mv s0, a1 +; RV32IZFH-NEXT: lw a1, 0(a0) +; RV32IZFH-NEXT: lw a2, 4(a0) +; RV32IZFH-NEXT: lw a3, 8(a0) +; RV32IZFH-NEXT: lw a0, 12(a0) +; RV32IZFH-NEXT: sw a1, 8(sp) +; RV32IZFH-NEXT: sw a2, 12(sp) +; RV32IZFH-NEXT: sw a3, 16(sp) +; RV32IZFH-NEXT: sw a0, 20(sp) ; RV32IZFH-NEXT: addi a0, sp, 8 -; RV32IZFH-NEXT: sw a2, 8(sp) -; RV32IZFH-NEXT: sw a3, 12(sp) -; RV32IZFH-NEXT: sw a4, 16(sp) -; RV32IZFH-NEXT: sw a5, 20(sp) ; RV32IZFH-NEXT: call __trunctfhf2 ; RV32IZFH-NEXT: fsh fa0, 0(s0) ; RV32IZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -8630,16 +8630,16 @@ define void @fcvt_h_q(fp128 %x, ptr %y) nounwind { ; RV32IDZFH-NEXT: addi sp, sp, -32 ; RV32IDZFH-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IDZFH-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IDZFH-NEXT: lw a2, 0(a0) -; RV32IDZFH-NEXT: lw a3, 4(a0) -; RV32IDZFH-NEXT: lw a4, 8(a0) -; RV32IDZFH-NEXT: lw a5, 12(a0) ; RV32IDZFH-NEXT: mv s0, a1 +; RV32IDZFH-NEXT: lw a1, 0(a0) +; RV32IDZFH-NEXT: lw a2, 4(a0) +; RV32IDZFH-NEXT: lw a3, 8(a0) +; RV32IDZFH-NEXT: lw a0, 12(a0) +; RV32IDZFH-NEXT: sw a1, 8(sp) +; RV32IDZFH-NEXT: sw a2, 12(sp) +; RV32IDZFH-NEXT: sw a3, 16(sp) +; RV32IDZFH-NEXT: sw a0, 20(sp) ; RV32IDZFH-NEXT: addi a0, sp, 8 -; RV32IDZFH-NEXT: sw a2, 8(sp) -; RV32IDZFH-NEXT: sw a3, 12(sp) -; RV32IDZFH-NEXT: sw a4, 16(sp) -; RV32IDZFH-NEXT: sw a5, 20(sp) ; RV32IDZFH-NEXT: call __trunctfhf2 ; RV32IDZFH-NEXT: fsh fa0, 0(s0) ; RV32IDZFH-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -8665,16 +8665,16 @@ define void @fcvt_h_q(fp128 %x, ptr %y) nounwind { ; RV32IZHINX-NEXT: addi sp, sp, -32 ; RV32IZHINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZHINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZHINX-NEXT: lw a2, 0(a0) -; RV32IZHINX-NEXT: lw a3, 4(a0) -; RV32IZHINX-NEXT: lw a4, 8(a0) -; RV32IZHINX-NEXT: lw a5, 12(a0) ; RV32IZHINX-NEXT: mv s0, a1 +; RV32IZHINX-NEXT: lw a1, 0(a0) +; RV32IZHINX-NEXT: lw a2, 4(a0) +; RV32IZHINX-NEXT: lw a3, 8(a0) +; RV32IZHINX-NEXT: lw a0, 12(a0) +; RV32IZHINX-NEXT: sw a1, 8(sp) +; RV32IZHINX-NEXT: sw a2, 12(sp) +; RV32IZHINX-NEXT: sw a3, 16(sp) +; RV32IZHINX-NEXT: sw a0, 20(sp) ; RV32IZHINX-NEXT: addi a0, sp, 8 -; RV32IZHINX-NEXT: sw a2, 8(sp) -; RV32IZHINX-NEXT: sw a3, 12(sp) -; RV32IZHINX-NEXT: sw a4, 16(sp) -; RV32IZHINX-NEXT: sw a5, 20(sp) ; RV32IZHINX-NEXT: call __trunctfhf2 ; RV32IZHINX-NEXT: sh a0, 0(s0) ; RV32IZHINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -8700,16 +8700,16 @@ define void @fcvt_h_q(fp128 %x, ptr %y) nounwind { ; RV32IZDINXZHINX-NEXT: addi sp, sp, -32 ; RV32IZDINXZHINX-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32IZDINXZHINX-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32IZDINXZHINX-NEXT: lw a2, 0(a0) -; RV32IZDINXZHINX-NEXT: lw a3, 4(a0) -; RV32IZDINXZHINX-NEXT: lw a4, 8(a0) -; RV32IZDINXZHINX-NEXT: lw a5, 12(a0) ; RV32IZDINXZHINX-NEXT: mv s0, a1 +; RV32IZDINXZHINX-NEXT: lw a1, 0(a0) +; RV32IZDINXZHINX-NEXT: lw a2, 4(a0) +; RV32IZDINXZHINX-NEXT: lw a3, 8(a0) +; RV32IZDINXZHINX-NEXT: lw a0, 12(a0) +; RV32IZDINXZHINX-NEXT: sw a1, 8(sp) +; RV32IZDINXZHINX-NEXT: sw a2, 12(sp) +; RV32IZDINXZHINX-NEXT: sw a3, 16(sp) +; RV32IZDINXZHINX-NEXT: sw a0, 20(sp) ; RV32IZDINXZHINX-NEXT: addi a0, sp, 8 -; RV32IZDINXZHINX-NEXT: sw a2, 8(sp) -; RV32IZDINXZHINX-NEXT: sw a3, 12(sp) -; RV32IZDINXZHINX-NEXT: sw a4, 16(sp) -; RV32IZDINXZHINX-NEXT: sw a5, 20(sp) ; RV32IZDINXZHINX-NEXT: call __trunctfhf2 ; RV32IZDINXZHINX-NEXT: sh a0, 0(s0) ; RV32IZDINXZHINX-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -8735,16 +8735,16 @@ define void @fcvt_h_q(fp128 %x, ptr %y) nounwind { ; RV32I-NEXT: addi sp, sp, -32 ; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a2, 0(a0) -; RV32I-NEXT: lw a3, 4(a0) -; RV32I-NEXT: lw a4, 8(a0) -; RV32I-NEXT: lw a5, 12(a0) ; RV32I-NEXT: mv s0, a1 +; RV32I-NEXT: lw a1, 0(a0) +; RV32I-NEXT: lw a2, 4(a0) +; RV32I-NEXT: lw a3, 8(a0) +; RV32I-NEXT: lw a0, 12(a0) +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a0, 20(sp) ; RV32I-NEXT: addi a0, sp, 8 -; RV32I-NEXT: sw a2, 8(sp) -; RV32I-NEXT: sw a3, 12(sp) -; RV32I-NEXT: sw a4, 16(sp) -; RV32I-NEXT: sw a5, 20(sp) ; RV32I-NEXT: call __trunctfhf2 ; RV32I-NEXT: sh a0, 0(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -8770,16 +8770,16 @@ define void @fcvt_h_q(fp128 %x, ptr %y) nounwind { ; RV32ID-ILP32-NEXT: addi sp, sp, -32 ; RV32ID-ILP32-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32ID-ILP32-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32ID-ILP32-NEXT: lw a2, 0(a0) -; RV32ID-ILP32-NEXT: lw a3, 4(a0) -; RV32ID-ILP32-NEXT: lw a4, 8(a0) -; RV32ID-ILP32-NEXT: lw a5, 12(a0) ; RV32ID-ILP32-NEXT: mv s0, a1 +; RV32ID-ILP32-NEXT: lw a1, 0(a0) +; RV32ID-ILP32-NEXT: lw a2, 4(a0) +; RV32ID-ILP32-NEXT: lw a3, 8(a0) +; RV32ID-ILP32-NEXT: lw a0, 12(a0) +; RV32ID-ILP32-NEXT: sw a1, 8(sp) +; RV32ID-ILP32-NEXT: sw a2, 12(sp) +; RV32ID-ILP32-NEXT: sw a3, 16(sp) +; RV32ID-ILP32-NEXT: sw a0, 20(sp) ; RV32ID-ILP32-NEXT: addi a0, sp, 8 -; RV32ID-ILP32-NEXT: sw a2, 8(sp) -; RV32ID-ILP32-NEXT: sw a3, 12(sp) -; RV32ID-ILP32-NEXT: sw a4, 16(sp) -; RV32ID-ILP32-NEXT: sw a5, 20(sp) ; RV32ID-ILP32-NEXT: call __trunctfhf2 ; RV32ID-ILP32-NEXT: sh a0, 0(s0) ; RV32ID-ILP32-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -8805,16 +8805,16 @@ define void @fcvt_h_q(fp128 %x, ptr %y) nounwind { ; RV32ID-NEXT: addi sp, sp, -32 ; RV32ID-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; RV32ID-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; RV32ID-NEXT: lw a2, 0(a0) -; RV32ID-NEXT: lw a3, 4(a0) -; RV32ID-NEXT: lw a4, 8(a0) -; RV32ID-NEXT: lw a5, 12(a0) ; RV32ID-NEXT: mv s0, a1 +; RV32ID-NEXT: lw a1, 0(a0) +; RV32ID-NEXT: lw a2, 4(a0) +; RV32ID-NEXT: lw a3, 8(a0) +; RV32ID-NEXT: lw a0, 12(a0) +; RV32ID-NEXT: sw a1, 8(sp) +; RV32ID-NEXT: sw a2, 12(sp) +; RV32ID-NEXT: sw a3, 16(sp) +; RV32ID-NEXT: sw a0, 20(sp) ; RV32ID-NEXT: addi a0, sp, 8 -; RV32ID-NEXT: sw a2, 8(sp) -; RV32ID-NEXT: sw a3, 12(sp) -; RV32ID-NEXT: sw a4, 16(sp) -; RV32ID-NEXT: sw a5, 20(sp) ; RV32ID-NEXT: call __trunctfhf2 ; RV32ID-NEXT: fmv.x.w a0, fa0 ; RV32ID-NEXT: sh a0, 0(s0) @@ -8842,16 +8842,16 @@ define void @fcvt_h_q(fp128 %x, ptr %y) nounwind { ; CHECK32-IZFHMIN-NEXT: addi sp, sp, -32 ; CHECK32-IZFHMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; CHECK32-IZFHMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; CHECK32-IZFHMIN-NEXT: lw a2, 0(a0) -; CHECK32-IZFHMIN-NEXT: lw a3, 4(a0) -; CHECK32-IZFHMIN-NEXT: lw a4, 8(a0) -; CHECK32-IZFHMIN-NEXT: lw a5, 12(a0) ; CHECK32-IZFHMIN-NEXT: mv s0, a1 +; CHECK32-IZFHMIN-NEXT: lw a1, 0(a0) +; CHECK32-IZFHMIN-NEXT: lw a2, 4(a0) +; CHECK32-IZFHMIN-NEXT: lw a3, 8(a0) +; CHECK32-IZFHMIN-NEXT: lw a0, 12(a0) +; CHECK32-IZFHMIN-NEXT: sw a1, 8(sp) +; CHECK32-IZFHMIN-NEXT: sw a2, 12(sp) +; CHECK32-IZFHMIN-NEXT: sw a3, 16(sp) +; CHECK32-IZFHMIN-NEXT: sw a0, 20(sp) ; CHECK32-IZFHMIN-NEXT: addi a0, sp, 8 -; CHECK32-IZFHMIN-NEXT: sw a2, 8(sp) -; CHECK32-IZFHMIN-NEXT: sw a3, 12(sp) -; CHECK32-IZFHMIN-NEXT: sw a4, 16(sp) -; CHECK32-IZFHMIN-NEXT: sw a5, 20(sp) ; CHECK32-IZFHMIN-NEXT: call __trunctfhf2 ; CHECK32-IZFHMIN-NEXT: fsh fa0, 0(s0) ; CHECK32-IZFHMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -8877,16 +8877,16 @@ define void @fcvt_h_q(fp128 %x, ptr %y) nounwind { ; CHECK32-IZHINXMIN-NEXT: addi sp, sp, -32 ; CHECK32-IZHINXMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; CHECK32-IZHINXMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; CHECK32-IZHINXMIN-NEXT: lw a2, 0(a0) -; CHECK32-IZHINXMIN-NEXT: lw a3, 4(a0) -; CHECK32-IZHINXMIN-NEXT: lw a4, 8(a0) -; CHECK32-IZHINXMIN-NEXT: lw a5, 12(a0) ; CHECK32-IZHINXMIN-NEXT: mv s0, a1 +; CHECK32-IZHINXMIN-NEXT: lw a1, 0(a0) +; CHECK32-IZHINXMIN-NEXT: lw a2, 4(a0) +; CHECK32-IZHINXMIN-NEXT: lw a3, 8(a0) +; CHECK32-IZHINXMIN-NEXT: lw a0, 12(a0) +; CHECK32-IZHINXMIN-NEXT: sw a1, 8(sp) +; CHECK32-IZHINXMIN-NEXT: sw a2, 12(sp) +; CHECK32-IZHINXMIN-NEXT: sw a3, 16(sp) +; CHECK32-IZHINXMIN-NEXT: sw a0, 20(sp) ; CHECK32-IZHINXMIN-NEXT: addi a0, sp, 8 -; CHECK32-IZHINXMIN-NEXT: sw a2, 8(sp) -; CHECK32-IZHINXMIN-NEXT: sw a3, 12(sp) -; CHECK32-IZHINXMIN-NEXT: sw a4, 16(sp) -; CHECK32-IZHINXMIN-NEXT: sw a5, 20(sp) ; CHECK32-IZHINXMIN-NEXT: call __trunctfhf2 ; CHECK32-IZHINXMIN-NEXT: sh a0, 0(s0) ; CHECK32-IZHINXMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -8912,16 +8912,16 @@ define void @fcvt_h_q(fp128 %x, ptr %y) nounwind { ; CHECK32-IZDINXZHINXMIN-NEXT: addi sp, sp, -32 ; CHECK32-IZDINXZHINXMIN-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; CHECK32-IZDINXZHINXMIN-NEXT: sw s0, 24(sp) # 4-byte Folded Spill -; CHECK32-IZDINXZHINXMIN-NEXT: lw a2, 0(a0) -; CHECK32-IZDINXZHINXMIN-NEXT: lw a3, 4(a0) -; CHECK32-IZDINXZHINXMIN-NEXT: lw a4, 8(a0) -; CHECK32-IZDINXZHINXMIN-NEXT: lw a5, 12(a0) ; CHECK32-IZDINXZHINXMIN-NEXT: mv s0, a1 +; CHECK32-IZDINXZHINXMIN-NEXT: lw a1, 0(a0) +; CHECK32-IZDINXZHINXMIN-NEXT: lw a2, 4(a0) +; CHECK32-IZDINXZHINXMIN-NEXT: lw a3, 8(a0) +; CHECK32-IZDINXZHINXMIN-NEXT: lw a0, 12(a0) +; CHECK32-IZDINXZHINXMIN-NEXT: sw a1, 8(sp) +; CHECK32-IZDINXZHINXMIN-NEXT: sw a2, 12(sp) +; CHECK32-IZDINXZHINXMIN-NEXT: sw a3, 16(sp) +; CHECK32-IZDINXZHINXMIN-NEXT: sw a0, 20(sp) ; CHECK32-IZDINXZHINXMIN-NEXT: addi a0, sp, 8 -; CHECK32-IZDINXZHINXMIN-NEXT: sw a2, 8(sp) -; CHECK32-IZDINXZHINXMIN-NEXT: sw a3, 12(sp) -; CHECK32-IZDINXZHINXMIN-NEXT: sw a4, 16(sp) -; CHECK32-IZDINXZHINXMIN-NEXT: sw a5, 20(sp) ; CHECK32-IZDINXZHINXMIN-NEXT: call __trunctfhf2 ; CHECK32-IZDINXZHINXMIN-NEXT: sh a0, 0(s0) ; CHECK32-IZDINXZHINXMIN-NEXT: lw ra, 28(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll b/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll index 12cf088e3205f..7754f5b8f9f3a 100644 --- a/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll +++ b/llvm/test/CodeGen/RISCV/half-fcmp-strict.ll @@ -222,8 +222,8 @@ define i32 @fcmp_one(half %a, half %b) nounwind strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: flt.h a2, fa1, fa0 ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: or a0, a2, a1 ; CHECK-NEXT: feq.h zero, fa1, fa0 +; CHECK-NEXT: or a0, a2, a1 ; CHECK-NEXT: ret ; ; CHECKIZHINX-LABEL: fcmp_one: @@ -235,9 +235,8 @@ define i32 @fcmp_one(half %a, half %b) nounwind strictfp { ; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: flt.h a4, a1, a0 ; CHECKIZHINX-NEXT: fsflags a2 -; CHECKIZHINX-NEXT: or a2, a4, a3 ; CHECKIZHINX-NEXT: feq.h zero, a1, a0 -; CHECKIZHINX-NEXT: mv a0, a2 +; CHECKIZHINX-NEXT: or a0, a4, a3 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: fcmp_one: @@ -249,23 +248,23 @@ define i32 @fcmp_one(half %a, half %b) nounwind strictfp { ; CHECKIZFHMIN-NEXT: frflags a0 ; CHECKIZFHMIN-NEXT: flt.h a2, fa1, fa0 ; CHECKIZFHMIN-NEXT: fsflags a0 -; CHECKIZFHMIN-NEXT: or a0, a2, a1 ; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 +; CHECKIZFHMIN-NEXT: or a0, a2, a1 ; CHECKIZFHMIN-NEXT: ret ; ; CHECKIZHINXMIN-LABEL: fcmp_one: ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 -; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 -; CHECKIZHINXMIN-NEXT: frflags a0 -; CHECKIZHINXMIN-NEXT: flt.s a3, a2, a1 -; CHECKIZHINXMIN-NEXT: fsflags a0 -; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1 -; CHECKIZHINXMIN-NEXT: frflags a0 -; CHECKIZHINXMIN-NEXT: flt.s a4, a1, a2 -; CHECKIZHINXMIN-NEXT: fsflags a0 +; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0 +; CHECKIZHINXMIN-NEXT: frflags a2 +; CHECKIZHINXMIN-NEXT: flt.s a3, a0, a1 +; CHECKIZHINXMIN-NEXT: fsflags a2 +; CHECKIZHINXMIN-NEXT: feq.s zero, a0, a1 +; CHECKIZHINXMIN-NEXT: frflags a2 +; CHECKIZHINXMIN-NEXT: flt.s a4, a1, a0 +; CHECKIZHINXMIN-NEXT: fsflags a2 +; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a0 ; CHECKIZHINXMIN-NEXT: or a0, a4, a3 -; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2 ; CHECKIZHINXMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"one", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 @@ -319,9 +318,9 @@ define i32 @fcmp_ueq(half %a, half %b) nounwind strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: flt.h a2, fa1, fa0 ; CHECK-NEXT: fsflags a0 +; CHECK-NEXT: feq.h zero, fa1, fa0 ; CHECK-NEXT: or a1, a2, a1 ; CHECK-NEXT: xori a0, a1, 1 -; CHECK-NEXT: feq.h zero, fa1, fa0 ; CHECK-NEXT: ret ; ; CHECKIZHINX-LABEL: fcmp_ueq: @@ -333,10 +332,9 @@ define i32 @fcmp_ueq(half %a, half %b) nounwind strictfp { ; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: flt.h a4, a1, a0 ; CHECKIZHINX-NEXT: fsflags a2 -; CHECKIZHINX-NEXT: or a3, a4, a3 -; CHECKIZHINX-NEXT: xori a2, a3, 1 ; CHECKIZHINX-NEXT: feq.h zero, a1, a0 -; CHECKIZHINX-NEXT: mv a0, a2 +; CHECKIZHINX-NEXT: or a3, a4, a3 +; CHECKIZHINX-NEXT: xori a0, a3, 1 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: fcmp_ueq: @@ -348,25 +346,25 @@ define i32 @fcmp_ueq(half %a, half %b) nounwind strictfp { ; CHECKIZFHMIN-NEXT: frflags a0 ; CHECKIZFHMIN-NEXT: flt.h a2, fa1, fa0 ; CHECKIZFHMIN-NEXT: fsflags a0 +; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 ; CHECKIZFHMIN-NEXT: or a1, a2, a1 ; CHECKIZFHMIN-NEXT: xori a0, a1, 1 -; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 ; CHECKIZFHMIN-NEXT: ret ; ; CHECKIZHINXMIN-LABEL: fcmp_ueq: ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 -; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 -; CHECKIZHINXMIN-NEXT: frflags a0 -; CHECKIZHINXMIN-NEXT: flt.s a3, a2, a1 -; CHECKIZHINXMIN-NEXT: fsflags a0 -; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1 -; CHECKIZHINXMIN-NEXT: frflags a0 -; CHECKIZHINXMIN-NEXT: flt.s a4, a1, a2 -; CHECKIZHINXMIN-NEXT: fsflags a0 +; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0 +; CHECKIZHINXMIN-NEXT: frflags a2 +; CHECKIZHINXMIN-NEXT: flt.s a3, a0, a1 +; CHECKIZHINXMIN-NEXT: fsflags a2 +; CHECKIZHINXMIN-NEXT: feq.s zero, a0, a1 +; CHECKIZHINXMIN-NEXT: frflags a2 +; CHECKIZHINXMIN-NEXT: flt.s a4, a1, a0 +; CHECKIZHINXMIN-NEXT: fsflags a2 +; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a0 ; CHECKIZHINXMIN-NEXT: or a3, a4, a3 ; CHECKIZHINXMIN-NEXT: xori a0, a3, 1 -; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2 ; CHECKIZHINXMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ueq", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 @@ -379,8 +377,8 @@ define i32 @fcmp_ugt(half %a, half %b) nounwind strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: fle.h a1, fa0, fa1 ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: feq.h zero, fa0, fa1 +; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: ret ; ; CHECKIZHINX-LABEL: fcmp_ugt: @@ -388,9 +386,8 @@ define i32 @fcmp_ugt(half %a, half %b) nounwind strictfp { ; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: fle.h a3, a0, a1 ; CHECKIZHINX-NEXT: fsflags a2 -; CHECKIZHINX-NEXT: xori a2, a3, 1 ; CHECKIZHINX-NEXT: feq.h zero, a0, a1 -; CHECKIZHINX-NEXT: mv a0, a2 +; CHECKIZHINX-NEXT: xori a0, a3, 1 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: fcmp_ugt: @@ -398,19 +395,19 @@ define i32 @fcmp_ugt(half %a, half %b) nounwind strictfp { ; CHECKIZFHMIN-NEXT: frflags a0 ; CHECKIZFHMIN-NEXT: fle.h a1, fa0, fa1 ; CHECKIZFHMIN-NEXT: fsflags a0 -; CHECKIZFHMIN-NEXT: xori a0, a1, 1 ; CHECKIZFHMIN-NEXT: feq.h zero, fa0, fa1 +; CHECKIZFHMIN-NEXT: xori a0, a1, 1 ; CHECKIZFHMIN-NEXT: ret ; ; CHECKIZHINXMIN-LABEL: fcmp_ugt: ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 -; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 -; CHECKIZHINXMIN-NEXT: frflags a0 -; CHECKIZHINXMIN-NEXT: fle.s a3, a2, a1 -; CHECKIZHINXMIN-NEXT: fsflags a0 +; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0 +; CHECKIZHINXMIN-NEXT: frflags a2 +; CHECKIZHINXMIN-NEXT: fle.s a3, a0, a1 +; CHECKIZHINXMIN-NEXT: fsflags a2 +; CHECKIZHINXMIN-NEXT: feq.s zero, a0, a1 ; CHECKIZHINXMIN-NEXT: xori a0, a3, 1 -; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1 ; CHECKIZHINXMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ugt", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 @@ -423,8 +420,8 @@ define i32 @fcmp_uge(half %a, half %b) nounwind strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: flt.h a1, fa0, fa1 ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: feq.h zero, fa0, fa1 +; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: ret ; ; CHECKIZHINX-LABEL: fcmp_uge: @@ -432,9 +429,8 @@ define i32 @fcmp_uge(half %a, half %b) nounwind strictfp { ; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: flt.h a3, a0, a1 ; CHECKIZHINX-NEXT: fsflags a2 -; CHECKIZHINX-NEXT: xori a2, a3, 1 ; CHECKIZHINX-NEXT: feq.h zero, a0, a1 -; CHECKIZHINX-NEXT: mv a0, a2 +; CHECKIZHINX-NEXT: xori a0, a3, 1 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: fcmp_uge: @@ -442,19 +438,19 @@ define i32 @fcmp_uge(half %a, half %b) nounwind strictfp { ; CHECKIZFHMIN-NEXT: frflags a0 ; CHECKIZFHMIN-NEXT: flt.h a1, fa0, fa1 ; CHECKIZFHMIN-NEXT: fsflags a0 -; CHECKIZFHMIN-NEXT: xori a0, a1, 1 ; CHECKIZFHMIN-NEXT: feq.h zero, fa0, fa1 +; CHECKIZFHMIN-NEXT: xori a0, a1, 1 ; CHECKIZFHMIN-NEXT: ret ; ; CHECKIZHINXMIN-LABEL: fcmp_uge: ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 -; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 -; CHECKIZHINXMIN-NEXT: frflags a0 -; CHECKIZHINXMIN-NEXT: flt.s a3, a2, a1 -; CHECKIZHINXMIN-NEXT: fsflags a0 +; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0 +; CHECKIZHINXMIN-NEXT: frflags a2 +; CHECKIZHINXMIN-NEXT: flt.s a3, a0, a1 +; CHECKIZHINXMIN-NEXT: fsflags a2 +; CHECKIZHINXMIN-NEXT: feq.s zero, a0, a1 ; CHECKIZHINXMIN-NEXT: xori a0, a3, 1 -; CHECKIZHINXMIN-NEXT: feq.s zero, a2, a1 ; CHECKIZHINXMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"uge", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 @@ -467,8 +463,8 @@ define i32 @fcmp_ult(half %a, half %b) nounwind strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: fle.h a1, fa1, fa0 ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: feq.h zero, fa1, fa0 +; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: ret ; ; CHECKIZHINX-LABEL: fcmp_ult: @@ -476,9 +472,8 @@ define i32 @fcmp_ult(half %a, half %b) nounwind strictfp { ; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: fle.h a3, a1, a0 ; CHECKIZHINX-NEXT: fsflags a2 -; CHECKIZHINX-NEXT: xori a2, a3, 1 ; CHECKIZHINX-NEXT: feq.h zero, a1, a0 -; CHECKIZHINX-NEXT: mv a0, a2 +; CHECKIZHINX-NEXT: xori a0, a3, 1 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: fcmp_ult: @@ -486,19 +481,19 @@ define i32 @fcmp_ult(half %a, half %b) nounwind strictfp { ; CHECKIZFHMIN-NEXT: frflags a0 ; CHECKIZFHMIN-NEXT: fle.h a1, fa1, fa0 ; CHECKIZFHMIN-NEXT: fsflags a0 -; CHECKIZFHMIN-NEXT: xori a0, a1, 1 ; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 +; CHECKIZFHMIN-NEXT: xori a0, a1, 1 ; CHECKIZFHMIN-NEXT: ret ; ; CHECKIZHINXMIN-LABEL: fcmp_ult: ; CHECKIZHINXMIN: # %bb.0: -; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 +; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 -; CHECKIZHINXMIN-NEXT: frflags a0 -; CHECKIZHINXMIN-NEXT: fle.s a3, a1, a2 -; CHECKIZHINXMIN-NEXT: fsflags a0 +; CHECKIZHINXMIN-NEXT: frflags a2 +; CHECKIZHINXMIN-NEXT: fle.s a3, a1, a0 +; CHECKIZHINXMIN-NEXT: fsflags a2 +; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a0 ; CHECKIZHINXMIN-NEXT: xori a0, a3, 1 -; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2 ; CHECKIZHINXMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ult", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 @@ -511,8 +506,8 @@ define i32 @fcmp_ule(half %a, half %b) nounwind strictfp { ; CHECK-NEXT: frflags a0 ; CHECK-NEXT: flt.h a1, fa1, fa0 ; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: feq.h zero, fa1, fa0 +; CHECK-NEXT: xori a0, a1, 1 ; CHECK-NEXT: ret ; ; CHECKIZHINX-LABEL: fcmp_ule: @@ -520,9 +515,8 @@ define i32 @fcmp_ule(half %a, half %b) nounwind strictfp { ; CHECKIZHINX-NEXT: frflags a2 ; CHECKIZHINX-NEXT: flt.h a3, a1, a0 ; CHECKIZHINX-NEXT: fsflags a2 -; CHECKIZHINX-NEXT: xori a2, a3, 1 ; CHECKIZHINX-NEXT: feq.h zero, a1, a0 -; CHECKIZHINX-NEXT: mv a0, a2 +; CHECKIZHINX-NEXT: xori a0, a3, 1 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: fcmp_ule: @@ -530,19 +524,19 @@ define i32 @fcmp_ule(half %a, half %b) nounwind strictfp { ; CHECKIZFHMIN-NEXT: frflags a0 ; CHECKIZFHMIN-NEXT: flt.h a1, fa1, fa0 ; CHECKIZFHMIN-NEXT: fsflags a0 -; CHECKIZFHMIN-NEXT: xori a0, a1, 1 ; CHECKIZFHMIN-NEXT: feq.h zero, fa1, fa0 +; CHECKIZFHMIN-NEXT: xori a0, a1, 1 ; CHECKIZFHMIN-NEXT: ret ; ; CHECKIZHINXMIN-LABEL: fcmp_ule: ; CHECKIZHINXMIN: # %bb.0: -; CHECKIZHINXMIN-NEXT: fcvt.s.h a2, a0 +; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 -; CHECKIZHINXMIN-NEXT: frflags a0 -; CHECKIZHINXMIN-NEXT: flt.s a3, a1, a2 -; CHECKIZHINXMIN-NEXT: fsflags a0 +; CHECKIZHINXMIN-NEXT: frflags a2 +; CHECKIZHINXMIN-NEXT: flt.s a3, a1, a0 +; CHECKIZHINXMIN-NEXT: fsflags a2 +; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a0 ; CHECKIZHINXMIN-NEXT: xori a0, a3, 1 -; CHECKIZHINXMIN-NEXT: feq.s zero, a1, a2 ; CHECKIZHINXMIN-NEXT: ret %1 = call i1 @llvm.experimental.constrained.fcmp.f16(half %a, half %b, metadata !"ule", metadata !"fpexcept.strict") strictfp %2 = zext i1 %1 to i32 diff --git a/llvm/test/CodeGen/RISCV/half-intrinsics.ll b/llvm/test/CodeGen/RISCV/half-intrinsics.ll index 7fcad77c7c17b..5d5f58278235c 100644 --- a/llvm/test/CodeGen/RISCV/half-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/half-intrinsics.ll @@ -3439,8 +3439,8 @@ define {half, i32} @frexp_half(half %x) nounwind { ; RV32IZFH-NEXT: fcvt.s.h fa0, fa0 ; RV32IZFH-NEXT: addi a0, sp, 8 ; RV32IZFH-NEXT: call frexpf -; RV32IZFH-NEXT: lw a0, 8(sp) ; RV32IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFH-NEXT: lw a0, 8(sp) ; RV32IZFH-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFH-NEXT: addi sp, sp, 16 ; RV32IZFH-NEXT: ret @@ -3452,8 +3452,8 @@ define {half, i32} @frexp_half(half %x) nounwind { ; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 ; RV64IZFH-NEXT: mv a0, sp ; RV64IZFH-NEXT: call frexpf -; RV64IZFH-NEXT: ld a0, 0(sp) ; RV64IZFH-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFH-NEXT: ld a0, 0(sp) ; RV64IZFH-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZFH-NEXT: addi sp, sp, 16 ; RV64IZFH-NEXT: ret @@ -3465,8 +3465,8 @@ define {half, i32} @frexp_half(half %x) nounwind { ; RV32IZHINX-NEXT: fcvt.s.h a0, a0 ; RV32IZHINX-NEXT: addi a1, sp, 8 ; RV32IZHINX-NEXT: call frexpf -; RV32IZHINX-NEXT: lw a1, 8(sp) ; RV32IZHINX-NEXT: fcvt.h.s a0, a0 +; RV32IZHINX-NEXT: lw a1, 8(sp) ; RV32IZHINX-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINX-NEXT: addi sp, sp, 16 ; RV32IZHINX-NEXT: ret @@ -3478,8 +3478,8 @@ define {half, i32} @frexp_half(half %x) nounwind { ; RV64IZHINX-NEXT: fcvt.s.h a0, a0 ; RV64IZHINX-NEXT: mv a1, sp ; RV64IZHINX-NEXT: call frexpf -; RV64IZHINX-NEXT: ld a1, 0(sp) ; RV64IZHINX-NEXT: fcvt.h.s a0, a0 +; RV64IZHINX-NEXT: ld a1, 0(sp) ; RV64IZHINX-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZHINX-NEXT: addi sp, sp, 16 ; RV64IZHINX-NEXT: ret @@ -3521,8 +3521,8 @@ define {half, i32} @frexp_half(half %x) nounwind { ; RV32IZFHMIN-NEXT: fcvt.s.h fa0, fa0 ; RV32IZFHMIN-NEXT: addi a0, sp, 8 ; RV32IZFHMIN-NEXT: call frexpf -; RV32IZFHMIN-NEXT: lw a0, 8(sp) ; RV32IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV32IZFHMIN-NEXT: lw a0, 8(sp) ; RV32IZFHMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZFHMIN-NEXT: addi sp, sp, 16 ; RV32IZFHMIN-NEXT: ret @@ -3534,8 +3534,8 @@ define {half, i32} @frexp_half(half %x) nounwind { ; RV64IZFHMIN-NEXT: fcvt.s.h fa0, fa0 ; RV64IZFHMIN-NEXT: mv a0, sp ; RV64IZFHMIN-NEXT: call frexpf -; RV64IZFHMIN-NEXT: ld a0, 0(sp) ; RV64IZFHMIN-NEXT: fcvt.h.s fa0, fa0 +; RV64IZFHMIN-NEXT: ld a0, 0(sp) ; RV64IZFHMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZFHMIN-NEXT: addi sp, sp, 16 ; RV64IZFHMIN-NEXT: ret @@ -3547,8 +3547,8 @@ define {half, i32} @frexp_half(half %x) nounwind { ; RV32IZHINXMIN-NEXT: fcvt.s.h a0, a0 ; RV32IZHINXMIN-NEXT: addi a1, sp, 8 ; RV32IZHINXMIN-NEXT: call frexpf -; RV32IZHINXMIN-NEXT: lw a1, 8(sp) ; RV32IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV32IZHINXMIN-NEXT: lw a1, 8(sp) ; RV32IZHINXMIN-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32IZHINXMIN-NEXT: addi sp, sp, 16 ; RV32IZHINXMIN-NEXT: ret @@ -3560,8 +3560,8 @@ define {half, i32} @frexp_half(half %x) nounwind { ; RV64IZHINXMIN-NEXT: fcvt.s.h a0, a0 ; RV64IZHINXMIN-NEXT: mv a1, sp ; RV64IZHINXMIN-NEXT: call frexpf -; RV64IZHINXMIN-NEXT: ld a1, 0(sp) ; RV64IZHINXMIN-NEXT: fcvt.h.s a0, a0 +; RV64IZHINXMIN-NEXT: ld a1, 0(sp) ; RV64IZHINXMIN-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64IZHINXMIN-NEXT: addi sp, sp, 16 ; RV64IZHINXMIN-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/half-mem.ll b/llvm/test/CodeGen/RISCV/half-mem.ll index 9ac2a4d037f8a..a910bb9eec875 100644 --- a/llvm/test/CodeGen/RISCV/half-mem.ll +++ b/llvm/test/CodeGen/RISCV/half-mem.ll @@ -33,21 +33,21 @@ define half @flh(ptr %a) nounwind { ; ; CHECKIZFHMIN-LABEL: flh: ; CHECKIZFHMIN: # %bb.0: -; CHECKIZFHMIN-NEXT: flh fa5, 6(a0) -; CHECKIZFHMIN-NEXT: flh fa4, 0(a0) -; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5 +; CHECKIZFHMIN-NEXT: flh fa5, 0(a0) +; CHECKIZFHMIN-NEXT: flh fa4, 6(a0) ; CHECKIZFHMIN-NEXT: fcvt.s.h fa4, fa4 -; CHECKIZFHMIN-NEXT: fadd.s fa5, fa4, fa5 +; CHECKIZFHMIN-NEXT: fcvt.s.h fa5, fa5 +; CHECKIZFHMIN-NEXT: fadd.s fa5, fa5, fa4 ; CHECKIZFHMIN-NEXT: fcvt.h.s fa0, fa5 ; CHECKIZFHMIN-NEXT: ret ; ; CHECKIZHINXMIN-LABEL: flh: ; CHECKIZHINXMIN: # %bb.0: -; CHECKIZHINXMIN-NEXT: lh a1, 6(a0) -; CHECKIZHINXMIN-NEXT: lh a0, 0(a0) -; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 +; CHECKIZHINXMIN-NEXT: lh a1, 0(a0) +; CHECKIZHINXMIN-NEXT: lh a0, 6(a0) ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0 -; CHECKIZHINXMIN-NEXT: fadd.s a0, a0, a1 +; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 +; CHECKIZHINXMIN-NEXT: fadd.s a0, a1, a0 ; CHECKIZHINXMIN-NEXT: fcvt.h.s a0, a0 ; CHECKIZHINXMIN-NEXT: ret %1 = load half, ptr %a diff --git a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll index d92dcb9eac4c6..9aff2d434689f 100644 --- a/llvm/test/CodeGen/RISCV/half-select-fcmp.ll +++ b/llvm/test/CodeGen/RISCV/half-select-fcmp.ll @@ -737,12 +737,12 @@ define i32 @i32_select_fcmp_oeq(half %a, half %b, i32 %c, i32 %d) nounwind { ; ; CHECKIZHINX-LABEL: i32_select_fcmp_oeq: ; CHECKIZHINX: # %bb.0: -; CHECKIZHINX-NEXT: feq.h a1, a0, a1 -; CHECKIZHINX-NEXT: mv a0, a2 -; CHECKIZHINX-NEXT: bnez a1, .LBB16_2 +; CHECKIZHINX-NEXT: feq.h a0, a0, a1 +; CHECKIZHINX-NEXT: bnez a0, .LBB16_2 ; CHECKIZHINX-NEXT: # %bb.1: -; CHECKIZHINX-NEXT: mv a0, a3 +; CHECKIZHINX-NEXT: mv a2, a3 ; CHECKIZHINX-NEXT: .LBB16_2: +; CHECKIZHINX-NEXT: mv a0, a2 ; CHECKIZHINX-NEXT: ret ; ; CHECKIZFHMIN-LABEL: i32_select_fcmp_oeq: @@ -760,12 +760,12 @@ define i32 @i32_select_fcmp_oeq(half %a, half %b, i32 %c, i32 %d) nounwind { ; CHECKIZHINXMIN: # %bb.0: ; CHECKIZHINXMIN-NEXT: fcvt.s.h a1, a1 ; CHECKIZHINXMIN-NEXT: fcvt.s.h a0, a0 -; CHECKIZHINXMIN-NEXT: feq.s a1, a0, a1 -; CHECKIZHINXMIN-NEXT: mv a0, a2 -; CHECKIZHINXMIN-NEXT: bnez a1, .LBB16_2 +; CHECKIZHINXMIN-NEXT: feq.s a0, a0, a1 +; CHECKIZHINXMIN-NEXT: bnez a0, .LBB16_2 ; CHECKIZHINXMIN-NEXT: # %bb.1: -; CHECKIZHINXMIN-NEXT: mv a0, a3 +; CHECKIZHINXMIN-NEXT: mv a2, a3 ; CHECKIZHINXMIN-NEXT: .LBB16_2: +; CHECKIZHINXMIN-NEXT: mv a0, a2 ; CHECKIZHINXMIN-NEXT: ret %1 = fcmp oeq half %a, %b %2 = select i1 %1, i32 %c, i32 %d diff --git a/llvm/test/CodeGen/RISCV/iabs.ll b/llvm/test/CodeGen/RISCV/iabs.ll index 66cde323ce507..00fac434517c4 100644 --- a/llvm/test/CodeGen/RISCV/iabs.ll +++ b/llvm/test/CodeGen/RISCV/iabs.ll @@ -301,58 +301,58 @@ define i64 @select_abs64(i64 %x) { define i128 @abs128(i128 %x) { ; RV32I-LABEL: abs128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 12(a1) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a1, 8(a1) -; RV32I-NEXT: bgez a3, .LBB8_2 +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: bgez a1, .LBB8_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: snez a6, a4 +; RV32I-NEXT: neg a5, a4 +; RV32I-NEXT: snez a6, a3 ; RV32I-NEXT: snez a7, a2 -; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: snez a4, a4 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: add a1, a3, a1 -; RV32I-NEXT: sub a4, a4, a7 -; RV32I-NEXT: sltu a3, a5, a6 -; RV32I-NEXT: neg a7, a1 -; RV32I-NEXT: sub a1, a5, a6 -; RV32I-NEXT: sub a3, a7, a3 +; RV32I-NEXT: add a1, a1, a4 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sltu a7, a5, a6 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a4, a5, a6 +; RV32I-NEXT: sub a1, a1, a7 ; RV32I-NEXT: neg a2, a2 ; RV32I-NEXT: .LBB8_2: ; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a4, 4(a0) -; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: abs128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 12(a1) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a4, 4(a1) -; RV32ZBB-NEXT: lw a1, 8(a1) -; RV32ZBB-NEXT: bgez a3, .LBB8_2 +; RV32ZBB-NEXT: lw a3, 4(a1) +; RV32ZBB-NEXT: lw a4, 8(a1) +; RV32ZBB-NEXT: lw a1, 12(a1) +; RV32ZBB-NEXT: bgez a1, .LBB8_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: neg a5, a1 -; RV32ZBB-NEXT: snez a6, a4 +; RV32ZBB-NEXT: neg a5, a4 +; RV32ZBB-NEXT: snez a6, a3 ; RV32ZBB-NEXT: snez a7, a2 -; RV32ZBB-NEXT: snez a1, a1 -; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: snez a4, a4 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: or a6, a7, a6 -; RV32ZBB-NEXT: add a1, a3, a1 -; RV32ZBB-NEXT: sub a4, a4, a7 -; RV32ZBB-NEXT: sltu a3, a5, a6 -; RV32ZBB-NEXT: neg a7, a1 -; RV32ZBB-NEXT: sub a1, a5, a6 -; RV32ZBB-NEXT: sub a3, a7, a3 +; RV32ZBB-NEXT: add a1, a1, a4 +; RV32ZBB-NEXT: sub a3, a3, a7 +; RV32ZBB-NEXT: sltu a7, a5, a6 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: sub a4, a5, a6 +; RV32ZBB-NEXT: sub a1, a1, a7 ; RV32ZBB-NEXT: neg a2, a2 ; RV32ZBB-NEXT: .LBB8_2: ; RV32ZBB-NEXT: sw a2, 0(a0) -; RV32ZBB-NEXT: sw a4, 4(a0) -; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 4(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a1, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: abs128: @@ -383,58 +383,58 @@ define i128 @abs128(i128 %x) { define i128 @select_abs128(i128 %x) { ; RV32I-LABEL: select_abs128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a3, 12(a1) ; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a1, 8(a1) -; RV32I-NEXT: bgez a3, .LBB9_2 +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a4, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: bgez a1, .LBB9_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: neg a5, a1 -; RV32I-NEXT: snez a6, a4 +; RV32I-NEXT: neg a5, a4 +; RV32I-NEXT: snez a6, a3 ; RV32I-NEXT: snez a7, a2 -; RV32I-NEXT: snez a1, a1 -; RV32I-NEXT: neg a4, a4 +; RV32I-NEXT: snez a4, a4 +; RV32I-NEXT: neg a3, a3 ; RV32I-NEXT: or a6, a7, a6 -; RV32I-NEXT: add a1, a3, a1 -; RV32I-NEXT: sub a4, a4, a7 -; RV32I-NEXT: sltu a3, a5, a6 -; RV32I-NEXT: neg a7, a1 -; RV32I-NEXT: sub a1, a5, a6 -; RV32I-NEXT: sub a3, a7, a3 +; RV32I-NEXT: add a1, a1, a4 +; RV32I-NEXT: sub a3, a3, a7 +; RV32I-NEXT: sltu a7, a5, a6 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a4, a5, a6 +; RV32I-NEXT: sub a1, a1, a7 ; RV32I-NEXT: neg a2, a2 ; RV32I-NEXT: .LBB9_2: ; RV32I-NEXT: sw a2, 0(a0) -; RV32I-NEXT: sw a4, 4(a0) -; RV32I-NEXT: sw a1, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a4, 8(a0) +; RV32I-NEXT: sw a1, 12(a0) ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: select_abs128: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a3, 12(a1) ; RV32ZBB-NEXT: lw a2, 0(a1) -; RV32ZBB-NEXT: lw a4, 4(a1) -; RV32ZBB-NEXT: lw a1, 8(a1) -; RV32ZBB-NEXT: bgez a3, .LBB9_2 +; RV32ZBB-NEXT: lw a3, 4(a1) +; RV32ZBB-NEXT: lw a4, 8(a1) +; RV32ZBB-NEXT: lw a1, 12(a1) +; RV32ZBB-NEXT: bgez a1, .LBB9_2 ; RV32ZBB-NEXT: # %bb.1: -; RV32ZBB-NEXT: neg a5, a1 -; RV32ZBB-NEXT: snez a6, a4 +; RV32ZBB-NEXT: neg a5, a4 +; RV32ZBB-NEXT: snez a6, a3 ; RV32ZBB-NEXT: snez a7, a2 -; RV32ZBB-NEXT: snez a1, a1 -; RV32ZBB-NEXT: neg a4, a4 +; RV32ZBB-NEXT: snez a4, a4 +; RV32ZBB-NEXT: neg a3, a3 ; RV32ZBB-NEXT: or a6, a7, a6 -; RV32ZBB-NEXT: add a1, a3, a1 -; RV32ZBB-NEXT: sub a4, a4, a7 -; RV32ZBB-NEXT: sltu a3, a5, a6 -; RV32ZBB-NEXT: neg a7, a1 -; RV32ZBB-NEXT: sub a1, a5, a6 -; RV32ZBB-NEXT: sub a3, a7, a3 +; RV32ZBB-NEXT: add a1, a1, a4 +; RV32ZBB-NEXT: sub a3, a3, a7 +; RV32ZBB-NEXT: sltu a7, a5, a6 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: sub a4, a5, a6 +; RV32ZBB-NEXT: sub a1, a1, a7 ; RV32ZBB-NEXT: neg a2, a2 ; RV32ZBB-NEXT: .LBB9_2: ; RV32ZBB-NEXT: sw a2, 0(a0) -; RV32ZBB-NEXT: sw a4, 4(a0) -; RV32ZBB-NEXT: sw a1, 8(a0) -; RV32ZBB-NEXT: sw a3, 12(a0) +; RV32ZBB-NEXT: sw a3, 4(a0) +; RV32ZBB-NEXT: sw a4, 8(a0) +; RV32ZBB-NEXT: sw a1, 12(a0) ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: select_abs128: diff --git a/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll b/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll index d58e6fe7675da..bbc4c3735de45 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-d-constraint-f.ll @@ -26,11 +26,11 @@ define double @constraint_f_double(double %a) nounwind { ; ; RV64F-LABEL: constraint_f_double: ; RV64F: # %bb.0: -; RV64F-NEXT: lui a1, %hi(gd) -; RV64F-NEXT: fld fa5, %lo(gd)(a1) -; RV64F-NEXT: fmv.d.x fa4, a0 +; RV64F-NEXT: fmv.d.x fa5, a0 +; RV64F-NEXT: lui a0, %hi(gd) +; RV64F-NEXT: fld fa4, %lo(gd)(a0) ; RV64F-NEXT: #APP -; RV64F-NEXT: fadd.d fa5, fa4, fa5 +; RV64F-NEXT: fadd.d fa5, fa5, fa4 ; RV64F-NEXT: #NO_APP ; RV64F-NEXT: fmv.x.d a0, fa5 ; RV64F-NEXT: ret @@ -59,11 +59,11 @@ define double @constraint_cf_double(double %a) nounwind { ; ; RV64F-LABEL: constraint_cf_double: ; RV64F: # %bb.0: -; RV64F-NEXT: lui a1, %hi(gd) -; RV64F-NEXT: fld fa5, %lo(gd)(a1) -; RV64F-NEXT: fmv.d.x fa4, a0 +; RV64F-NEXT: fmv.d.x fa5, a0 +; RV64F-NEXT: lui a0, %hi(gd) +; RV64F-NEXT: fld fa4, %lo(gd)(a0) ; RV64F-NEXT: #APP -; RV64F-NEXT: fadd.d fa5, fa4, fa5 +; RV64F-NEXT: fadd.d fa5, fa5, fa4 ; RV64F-NEXT: #NO_APP ; RV64F-NEXT: fmv.x.d a0, fa5 ; RV64F-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/inline-asm-d-modifier-N.ll b/llvm/test/CodeGen/RISCV/inline-asm-d-modifier-N.ll index 238a0fa0b6fd7..144ddb99e5c4c 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-d-modifier-N.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-d-modifier-N.ll @@ -29,11 +29,11 @@ define double @constraint_f_double(double %a) nounwind { ; ; RV64F-LABEL: constraint_f_double: ; RV64F: # %bb.0: -; RV64F-NEXT: lui a1, %hi(gd) -; RV64F-NEXT: fld fa5, %lo(gd)(a1) -; RV64F-NEXT: fmv.d.x fa4, a0 +; RV64F-NEXT: fmv.d.x fa5, a0 +; RV64F-NEXT: lui a0, %hi(gd) +; RV64F-NEXT: fld fa4, %lo(gd)(a0) ; RV64F-NEXT: #APP -; RV64F-NEXT: .insn 0x4, 0x02000053 | (15 << 7) | (14 << 15) | (15 << 20) +; RV64F-NEXT: .insn 0x4, 0x02000053 | (15 << 7) | (15 << 15) | (14 << 20) ; RV64F-NEXT: #NO_APP ; RV64F-NEXT: fmv.x.d a0, fa5 ; RV64F-NEXT: ret @@ -62,11 +62,11 @@ define double @constraint_cf_double(double %a) nounwind { ; ; RV64F-LABEL: constraint_cf_double: ; RV64F: # %bb.0: -; RV64F-NEXT: lui a1, %hi(gd) -; RV64F-NEXT: fld fa5, %lo(gd)(a1) -; RV64F-NEXT: fmv.d.x fa4, a0 +; RV64F-NEXT: fmv.d.x fa5, a0 +; RV64F-NEXT: lui a0, %hi(gd) +; RV64F-NEXT: fld fa4, %lo(gd)(a0) ; RV64F-NEXT: #APP -; RV64F-NEXT: .insn 0x4, 0x02000053 | (15 << 7) | (14 << 15) | (15 << 20) +; RV64F-NEXT: .insn 0x4, 0x02000053 | (15 << 7) | (15 << 15) | (14 << 20) ; RV64F-NEXT: #NO_APP ; RV64F-NEXT: fmv.x.d a0, fa5 ; RV64F-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll b/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll index f17f5ba15c605..8ed247d1398ad 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-f-constraint-f.ll @@ -13,22 +13,22 @@ define float @constraint_f_float(float %a) nounwind { ; RV32F-LABEL: constraint_f_float: ; RV32F: # %bb.0: -; RV32F-NEXT: lui a1, %hi(gf) -; RV32F-NEXT: flw fa5, %lo(gf)(a1) -; RV32F-NEXT: fmv.w.x fa4, a0 +; RV32F-NEXT: fmv.w.x fa5, a0 +; RV32F-NEXT: lui a0, %hi(gf) +; RV32F-NEXT: flw fa4, %lo(gf)(a0) ; RV32F-NEXT: #APP -; RV32F-NEXT: fadd.s fa5, fa4, fa5 +; RV32F-NEXT: fadd.s fa5, fa5, fa4 ; RV32F-NEXT: #NO_APP ; RV32F-NEXT: fmv.x.w a0, fa5 ; RV32F-NEXT: ret ; ; RV64F-LABEL: constraint_f_float: ; RV64F: # %bb.0: -; RV64F-NEXT: lui a1, %hi(gf) -; RV64F-NEXT: flw fa5, %lo(gf)(a1) -; RV64F-NEXT: fmv.w.x fa4, a0 +; RV64F-NEXT: fmv.w.x fa5, a0 +; RV64F-NEXT: lui a0, %hi(gf) +; RV64F-NEXT: flw fa4, %lo(gf)(a0) ; RV64F-NEXT: #APP -; RV64F-NEXT: fadd.s fa5, fa4, fa5 +; RV64F-NEXT: fadd.s fa5, fa5, fa4 ; RV64F-NEXT: #NO_APP ; RV64F-NEXT: fmv.x.w a0, fa5 ; RV64F-NEXT: ret @@ -40,22 +40,22 @@ define float @constraint_f_float(float %a) nounwind { define float @constraint_cf_float(float %a) nounwind { ; RV32F-LABEL: constraint_cf_float: ; RV32F: # %bb.0: -; RV32F-NEXT: lui a1, %hi(gf) -; RV32F-NEXT: flw fa5, %lo(gf)(a1) -; RV32F-NEXT: fmv.w.x fa4, a0 +; RV32F-NEXT: fmv.w.x fa5, a0 +; RV32F-NEXT: lui a0, %hi(gf) +; RV32F-NEXT: flw fa4, %lo(gf)(a0) ; RV32F-NEXT: #APP -; RV32F-NEXT: fadd.s fa5, fa4, fa5 +; RV32F-NEXT: fadd.s fa5, fa5, fa4 ; RV32F-NEXT: #NO_APP ; RV32F-NEXT: fmv.x.w a0, fa5 ; RV32F-NEXT: ret ; ; RV64F-LABEL: constraint_cf_float: ; RV64F: # %bb.0: -; RV64F-NEXT: lui a1, %hi(gf) -; RV64F-NEXT: flw fa5, %lo(gf)(a1) -; RV64F-NEXT: fmv.w.x fa4, a0 +; RV64F-NEXT: fmv.w.x fa5, a0 +; RV64F-NEXT: lui a0, %hi(gf) +; RV64F-NEXT: flw fa4, %lo(gf)(a0) ; RV64F-NEXT: #APP -; RV64F-NEXT: fadd.s fa5, fa4, fa5 +; RV64F-NEXT: fadd.s fa5, fa5, fa4 ; RV64F-NEXT: #NO_APP ; RV64F-NEXT: fmv.x.w a0, fa5 ; RV64F-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/inline-asm-f-modifier-N.ll b/llvm/test/CodeGen/RISCV/inline-asm-f-modifier-N.ll index a0de5c71a7df6..10ed6367a49c2 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-f-modifier-N.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-f-modifier-N.ll @@ -16,22 +16,22 @@ define float @constraint_f_modifier_N_float(float %a) nounwind { ; RV32F-LABEL: constraint_f_modifier_N_float: ; RV32F: # %bb.0: -; RV32F-NEXT: lui a1, %hi(gf) -; RV32F-NEXT: flw fa5, %lo(gf)(a1) -; RV32F-NEXT: fmv.w.x fa4, a0 +; RV32F-NEXT: fmv.w.x fa5, a0 +; RV32F-NEXT: lui a0, %hi(gf) +; RV32F-NEXT: flw fa4, %lo(gf)(a0) ; RV32F-NEXT: #APP -; RV32F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (14 << 15) | (15 << 20) +; RV32F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (15 << 15) | (14 << 20) ; RV32F-NEXT: #NO_APP ; RV32F-NEXT: fmv.x.w a0, fa5 ; RV32F-NEXT: ret ; ; RV64F-LABEL: constraint_f_modifier_N_float: ; RV64F: # %bb.0: -; RV64F-NEXT: lui a1, %hi(gf) -; RV64F-NEXT: flw fa5, %lo(gf)(a1) -; RV64F-NEXT: fmv.w.x fa4, a0 +; RV64F-NEXT: fmv.w.x fa5, a0 +; RV64F-NEXT: lui a0, %hi(gf) +; RV64F-NEXT: flw fa4, %lo(gf)(a0) ; RV64F-NEXT: #APP -; RV64F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (14 << 15) | (15 << 20) +; RV64F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (15 << 15) | (14 << 20) ; RV64F-NEXT: #NO_APP ; RV64F-NEXT: fmv.x.w a0, fa5 ; RV64F-NEXT: ret @@ -44,22 +44,22 @@ define float @constraint_f_modifier_N_float(float %a) nounwind { define float @constraint_cf_modifier_N_float(float %a) nounwind { ; RV32F-LABEL: constraint_cf_modifier_N_float: ; RV32F: # %bb.0: -; RV32F-NEXT: lui a1, %hi(gf) -; RV32F-NEXT: flw fa5, %lo(gf)(a1) -; RV32F-NEXT: fmv.w.x fa4, a0 +; RV32F-NEXT: fmv.w.x fa5, a0 +; RV32F-NEXT: lui a0, %hi(gf) +; RV32F-NEXT: flw fa4, %lo(gf)(a0) ; RV32F-NEXT: #APP -; RV32F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (14 << 15) | (15 << 20) +; RV32F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (15 << 15) | (14 << 20) ; RV32F-NEXT: #NO_APP ; RV32F-NEXT: fmv.x.w a0, fa5 ; RV32F-NEXT: ret ; ; RV64F-LABEL: constraint_cf_modifier_N_float: ; RV64F: # %bb.0: -; RV64F-NEXT: lui a1, %hi(gf) -; RV64F-NEXT: flw fa5, %lo(gf)(a1) -; RV64F-NEXT: fmv.w.x fa4, a0 +; RV64F-NEXT: fmv.w.x fa5, a0 +; RV64F-NEXT: lui a0, %hi(gf) +; RV64F-NEXT: flw fa4, %lo(gf)(a0) ; RV64F-NEXT: #APP -; RV64F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (14 << 15) | (15 << 20) +; RV64F-NEXT: .insn 0x4, 0x53 | (15 << 7) | (15 << 15) | (14 << 20) ; RV64F-NEXT: #NO_APP ; RV64F-NEXT: fmv.x.w a0, fa5 ; RV64F-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/inline-asm-zfinx-constraint-r.ll b/llvm/test/CodeGen/RISCV/inline-asm-zfinx-constraint-r.ll index 1c0de6c3f1612..4c15eaf7954d4 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-zfinx-constraint-r.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-zfinx-constraint-r.ll @@ -57,9 +57,9 @@ define float @constraint_float_abi_name(float %a) nounwind { ; RV32FINX: # %bb.0: ; RV32FINX-NEXT: addi sp, sp, -16 ; RV32FINX-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32FINX-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; RV32FINX-NEXT: lui a1, %hi(gf) ; RV32FINX-NEXT: lw s0, %lo(gf)(a1) -; RV32FINX-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; RV32FINX-NEXT: #APP ; RV32FINX-NEXT: fadd.s t0, a0, s0 ; RV32FINX-NEXT: #NO_APP @@ -72,9 +72,9 @@ define float @constraint_float_abi_name(float %a) nounwind { ; RV64FINX: # %bb.0: ; RV64FINX-NEXT: addi sp, sp, -16 ; RV64FINX-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64FINX-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; RV64FINX-NEXT: lui a1, %hi(gf) ; RV64FINX-NEXT: lw s0, %lo(gf)(a1) -; RV64FINX-NEXT: # kill: def $x10_w killed $x10_w def $x10 ; RV64FINX-NEXT: #APP ; RV64FINX-NEXT: fadd.s t0, a0, s0 ; RV64FINX-NEXT: #NO_APP diff --git a/llvm/test/CodeGen/RISCV/inline-asm-zhinx-constraint-r.ll b/llvm/test/CodeGen/RISCV/inline-asm-zhinx-constraint-r.ll index 086d2a1d6f3b2..4482d68eba122 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm-zhinx-constraint-r.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm-zhinx-constraint-r.ll @@ -97,9 +97,9 @@ define half @constraint_half_abi_name(half %a) nounwind { ; RV32ZHINX: # %bb.0: ; RV32ZHINX-NEXT: addi sp, sp, -16 ; RV32ZHINX-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32ZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32ZHINX-NEXT: lui a1, %hi(gh) ; RV32ZHINX-NEXT: lh s0, %lo(gh)(a1) -; RV32ZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32ZHINX-NEXT: #APP ; RV32ZHINX-NEXT: fadd.s t0, a0, s0 ; RV32ZHINX-NEXT: #NO_APP @@ -112,9 +112,9 @@ define half @constraint_half_abi_name(half %a) nounwind { ; RV64ZHINX: # %bb.0: ; RV64ZHINX-NEXT: addi sp, sp, -16 ; RV64ZHINX-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64ZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64ZHINX-NEXT: lui a1, %hi(gh) ; RV64ZHINX-NEXT: lh s0, %lo(gh)(a1) -; RV64ZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64ZHINX-NEXT: #APP ; RV64ZHINX-NEXT: fadd.s t0, a0, s0 ; RV64ZHINX-NEXT: #NO_APP @@ -127,9 +127,9 @@ define half @constraint_half_abi_name(half %a) nounwind { ; RV32DINXZHINX: # %bb.0: ; RV32DINXZHINX-NEXT: addi sp, sp, -16 ; RV32DINXZHINX-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32DINXZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32DINXZHINX-NEXT: lui a1, %hi(gh) ; RV32DINXZHINX-NEXT: lh s0, %lo(gh)(a1) -; RV32DINXZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV32DINXZHINX-NEXT: #APP ; RV32DINXZHINX-NEXT: fadd.s t0, a0, s0 ; RV32DINXZHINX-NEXT: #NO_APP @@ -142,9 +142,9 @@ define half @constraint_half_abi_name(half %a) nounwind { ; RV64DINXZHINX: # %bb.0: ; RV64DINXZHINX-NEXT: addi sp, sp, -16 ; RV64DINXZHINX-NEXT: sd s0, 8(sp) # 8-byte Folded Spill +; RV64DINXZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64DINXZHINX-NEXT: lui a1, %hi(gh) ; RV64DINXZHINX-NEXT: lh s0, %lo(gh)(a1) -; RV64DINXZHINX-NEXT: # kill: def $x10_h killed $x10_h def $x10 ; RV64DINXZHINX-NEXT: #APP ; RV64DINXZHINX-NEXT: fadd.s t0, a0, s0 ; RV64DINXZHINX-NEXT: #NO_APP diff --git a/llvm/test/CodeGen/RISCV/inline-asm.ll b/llvm/test/CodeGen/RISCV/inline-asm.ll index 79266743a1d05..7382ab4d3d1c2 100644 --- a/llvm/test/CodeGen/RISCV/inline-asm.ll +++ b/llvm/test/CodeGen/RISCV/inline-asm.ll @@ -34,21 +34,21 @@ define i32 @constraint_r(i32 %a) nounwind { define i32 @constraint_r_zero(i32 %a) nounwind { ; RV32I-LABEL: constraint_r_zero: ; RV32I: # %bb.0: -; RV32I-NEXT: lui a0, %hi(gi) -; RV32I-NEXT: lw a0, %lo(gi)(a0) -; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: li a0, 0 +; RV32I-NEXT: lui a1, %hi(gi) +; RV32I-NEXT: lw a1, %lo(gi)(a1) ; RV32I-NEXT: #APP -; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: #NO_APP ; RV32I-NEXT: ret ; ; RV64I-LABEL: constraint_r_zero: ; RV64I: # %bb.0: -; RV64I-NEXT: lui a0, %hi(gi) -; RV64I-NEXT: lw a0, %lo(gi)(a0) -; RV64I-NEXT: li a1, 0 +; RV64I-NEXT: li a0, 0 +; RV64I-NEXT: lui a1, %hi(gi) +; RV64I-NEXT: lw a1, %lo(gi)(a1) ; RV64I-NEXT: #APP -; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: add a0, a0, a1 ; RV64I-NEXT: #NO_APP ; RV64I-NEXT: ret %1 = load i32, ptr @gi diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index 111b3e2bf82ce..391448b28c20b 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -75,15 +75,15 @@ define i64 @ctz_nxv8i1_no_range( %a) { ; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vsetvli a3, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: vid.v v8 -; RV32-NEXT: li a2, -1 -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vl2r.v v24, (a3) # Unknown-size Folded Reload +; RV32-NEXT: li a3, -1 +; RV32-NEXT: addi a4, sp, 32 +; RV32-NEXT: vl2r.v v16, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; RV32-NEXT: vmsne.vi v0, v24, 0 +; RV32-NEXT: vmsne.vi v0, v16, 0 +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vmadd.vx v8, a2, v16 +; RV32-NEXT: vmadd.vx v8, a3, v16 ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vmerge.vim v16, v16, -1, v0 diff --git a/llvm/test/CodeGen/RISCV/legalize-fneg.ll b/llvm/test/CodeGen/RISCV/legalize-fneg.ll index f60b77b92c09e..38cce2121c91d 100644 --- a/llvm/test/CodeGen/RISCV/legalize-fneg.ll +++ b/llvm/test/CodeGen/RISCV/legalize-fneg.ll @@ -30,12 +30,12 @@ entry: define void @test2(ptr %a, ptr %b) nounwind { ; RV32-LABEL: test2: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lw a2, 4(a1) -; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: lui a3, 524288 -; RV32-NEXT: xor a2, a2, a3 -; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: sw a2, 4(a0) +; RV32-NEXT: xor a1, a1, a3 +; RV32-NEXT: sw a2, 0(a0) +; RV32-NEXT: sw a1, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: test2: @@ -56,27 +56,27 @@ entry: define void @test3(ptr %a, ptr %b) nounwind { ; RV32-LABEL: test3: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lw a2, 12(a1) -; RV32-NEXT: lw a3, 0(a1) -; RV32-NEXT: lw a4, 4(a1) -; RV32-NEXT: lw a1, 8(a1) +; RV32-NEXT: lw a2, 0(a1) +; RV32-NEXT: lw a3, 4(a1) +; RV32-NEXT: lw a4, 8(a1) +; RV32-NEXT: lw a1, 12(a1) ; RV32-NEXT: lui a5, 524288 -; RV32-NEXT: xor a2, a2, a5 -; RV32-NEXT: sw a3, 0(a0) -; RV32-NEXT: sw a4, 4(a0) -; RV32-NEXT: sw a1, 8(a0) -; RV32-NEXT: sw a2, 12(a0) +; RV32-NEXT: xor a1, a1, a5 +; RV32-NEXT: sw a2, 0(a0) +; RV32-NEXT: sw a3, 4(a0) +; RV32-NEXT: sw a4, 8(a0) +; RV32-NEXT: sw a1, 12(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: test3: ; RV64: # %bb.0: # %entry -; RV64-NEXT: ld a2, 8(a1) -; RV64-NEXT: ld a1, 0(a1) +; RV64-NEXT: ld a2, 0(a1) +; RV64-NEXT: ld a1, 8(a1) ; RV64-NEXT: li a3, -1 ; RV64-NEXT: slli a3, a3, 63 -; RV64-NEXT: xor a2, a2, a3 -; RV64-NEXT: sd a1, 0(a0) -; RV64-NEXT: sd a2, 8(a0) +; RV64-NEXT: xor a1, a1, a3 +; RV64-NEXT: sd a2, 0(a0) +; RV64-NEXT: sd a1, 8(a0) ; RV64-NEXT: ret entry: %0 = load fp128, ptr %b diff --git a/llvm/test/CodeGen/RISCV/llvm.exp10.ll b/llvm/test/CodeGen/RISCV/llvm.exp10.ll index 7b199504837e8..51189ef60e852 100644 --- a/llvm/test/CodeGen/RISCV/llvm.exp10.ll +++ b/llvm/test/CodeGen/RISCV/llvm.exp10.ll @@ -143,12 +143,12 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) { ; RV32IFD-NEXT: .cfi_offset fs1, -32 ; RV32IFD-NEXT: .cfi_offset fs2, -40 ; RV32IFD-NEXT: mv s0, a0 -; RV32IFD-NEXT: lhu a0, 8(a1) -; RV32IFD-NEXT: lhu a2, 0(a1) -; RV32IFD-NEXT: lhu a1, 4(a1) -; RV32IFD-NEXT: fmv.w.x fs0, a0 -; RV32IFD-NEXT: fmv.w.x fs1, a2 -; RV32IFD-NEXT: fmv.w.x fa0, a1 +; RV32IFD-NEXT: lhu a0, 0(a1) +; RV32IFD-NEXT: lhu a2, 4(a1) +; RV32IFD-NEXT: lhu a1, 8(a1) +; RV32IFD-NEXT: fmv.w.x fs0, a1 +; RV32IFD-NEXT: fmv.w.x fs1, a0 +; RV32IFD-NEXT: fmv.w.x fa0, a2 ; RV32IFD-NEXT: call __extendhfsf2 ; RV32IFD-NEXT: call exp10f ; RV32IFD-NEXT: call __truncsfhf2 @@ -200,11 +200,11 @@ define <3 x half> @exp10_v3f16(<3 x half> %x) { ; RV64IFD-NEXT: .cfi_offset s1, -24 ; RV64IFD-NEXT: .cfi_offset s2, -32 ; RV64IFD-NEXT: .cfi_offset fs0, -40 +; RV64IFD-NEXT: mv s0, a0 ; RV64IFD-NEXT: lhu s1, 0(a1) -; RV64IFD-NEXT: lhu a2, 8(a1) +; RV64IFD-NEXT: lhu a0, 8(a1) ; RV64IFD-NEXT: lhu s2, 16(a1) -; RV64IFD-NEXT: mv s0, a0 -; RV64IFD-NEXT: fmv.w.x fa0, a2 +; RV64IFD-NEXT: fmv.w.x fa0, a0 ; RV64IFD-NEXT: call __extendhfsf2 ; RV64IFD-NEXT: call exp10f ; RV64IFD-NEXT: call __truncsfhf2 @@ -267,14 +267,14 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) { ; RV32IFD-NEXT: .cfi_offset fs2, -48 ; RV32IFD-NEXT: .cfi_offset fs3, -56 ; RV32IFD-NEXT: mv s0, a0 -; RV32IFD-NEXT: lhu a0, 12(a1) -; RV32IFD-NEXT: lhu a2, 0(a1) -; RV32IFD-NEXT: lhu a3, 4(a1) -; RV32IFD-NEXT: lhu a1, 8(a1) -; RV32IFD-NEXT: fmv.w.x fs0, a0 -; RV32IFD-NEXT: fmv.w.x fs1, a2 -; RV32IFD-NEXT: fmv.w.x fs2, a3 -; RV32IFD-NEXT: fmv.w.x fa0, a1 +; RV32IFD-NEXT: lhu a0, 0(a1) +; RV32IFD-NEXT: lhu a2, 4(a1) +; RV32IFD-NEXT: lhu a3, 8(a1) +; RV32IFD-NEXT: lhu a1, 12(a1) +; RV32IFD-NEXT: fmv.w.x fs0, a1 +; RV32IFD-NEXT: fmv.w.x fs1, a0 +; RV32IFD-NEXT: fmv.w.x fs2, a2 +; RV32IFD-NEXT: fmv.w.x fa0, a3 ; RV32IFD-NEXT: call __extendhfsf2 ; RV32IFD-NEXT: call exp10f ; RV32IFD-NEXT: call __truncsfhf2 @@ -343,12 +343,12 @@ define <4 x half> @exp10_v4f16(<4 x half> %x) { ; RV64IFD-NEXT: .cfi_offset fs0, -48 ; RV64IFD-NEXT: .cfi_offset fs1, -56 ; RV64IFD-NEXT: .cfi_offset fs2, -64 +; RV64IFD-NEXT: mv s0, a0 ; RV64IFD-NEXT: lhu s1, 0(a1) ; RV64IFD-NEXT: lhu s2, 8(a1) -; RV64IFD-NEXT: lhu a2, 16(a1) +; RV64IFD-NEXT: lhu a0, 16(a1) ; RV64IFD-NEXT: lhu s3, 24(a1) -; RV64IFD-NEXT: mv s0, a0 -; RV64IFD-NEXT: fmv.w.x fa0, a2 +; RV64IFD-NEXT: fmv.w.x fa0, a0 ; RV64IFD-NEXT: call __extendhfsf2 ; RV64IFD-NEXT: call exp10f ; RV64IFD-NEXT: call __truncsfhf2 diff --git a/llvm/test/CodeGen/RISCV/llvm.frexp.ll b/llvm/test/CodeGen/RISCV/llvm.frexp.ll index 4a77b4d32cdda..28f56e49b6693 100644 --- a/llvm/test/CodeGen/RISCV/llvm.frexp.ll +++ b/llvm/test/CodeGen/RISCV/llvm.frexp.ll @@ -730,38 +730,37 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw s0, 4(a1) -; RV32I-NEXT: lw s1, 8(a1) -; RV32I-NEXT: lw s2, 12(a1) -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a0, 0(a1) +; RV32I-NEXT: lw s1, 4(a1) +; RV32I-NEXT: lw s2, 8(a1) +; RV32I-NEXT: lw s3, 12(a1) ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: addi a1, sp, 12 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call frexpf -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: addi a1, sp, 16 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: addi a1, sp, 20 +; RV32I-NEXT: addi a1, sp, 16 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call frexpf +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: addi a1, sp, 20 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call frexpf ; RV32I-NEXT: lw a1, 8(sp) ; RV32I-NEXT: lw a2, 12(sp) ; RV32I-NEXT: lw a3, 16(sp) ; RV32I-NEXT: lw a4, 20(sp) -; RV32I-NEXT: sw s4, 0(s3) -; RV32I-NEXT: sw s0, 4(s3) -; RV32I-NEXT: sw s1, 8(s3) -; RV32I-NEXT: sw a0, 12(s3) -; RV32I-NEXT: sw a1, 16(s3) -; RV32I-NEXT: sw a2, 20(s3) -; RV32I-NEXT: sw a3, 24(s3) -; RV32I-NEXT: sw a4, 28(s3) +; RV32I-NEXT: sw s4, 0(s0) +; RV32I-NEXT: sw s1, 4(s0) +; RV32I-NEXT: sw s2, 8(s0) +; RV32I-NEXT: sw a0, 12(s0) +; RV32I-NEXT: sw a1, 16(s0) +; RV32I-NEXT: sw a2, 20(s0) +; RV32I-NEXT: sw a3, 24(s0) +; RV32I-NEXT: sw a4, 28(s0) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -780,38 +779,37 @@ define { <4 x float>, <4 x i32> } @test_frexp_v4f32_v4i32(<4 x float> %a) nounwi ; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a2, 0(a1) -; RV64I-NEXT: lw s0, 8(a1) -; RV64I-NEXT: lw s1, 16(a1) -; RV64I-NEXT: lw s2, 24(a1) -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lw a0, 0(a1) +; RV64I-NEXT: lw s1, 8(a1) +; RV64I-NEXT: lw s2, 16(a1) +; RV64I-NEXT: lw s3, 24(a1) ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: addi a1, sp, 4 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call frexpf -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: addi a1, sp, 8 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: addi a1, sp, 12 +; RV64I-NEXT: addi a1, sp, 8 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call frexpf +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: addi a1, sp, 12 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call frexpf ; RV64I-NEXT: lw a1, 0(sp) ; RV64I-NEXT: lw a2, 4(sp) ; RV64I-NEXT: lw a3, 8(sp) ; RV64I-NEXT: lw a4, 12(sp) -; RV64I-NEXT: sw s4, 0(s3) -; RV64I-NEXT: sw s0, 4(s3) -; RV64I-NEXT: sw s1, 8(s3) -; RV64I-NEXT: sw a0, 12(s3) -; RV64I-NEXT: sw a1, 16(s3) -; RV64I-NEXT: sw a2, 20(s3) -; RV64I-NEXT: sw a3, 24(s3) -; RV64I-NEXT: sw a4, 28(s3) +; RV64I-NEXT: sw s4, 0(s0) +; RV64I-NEXT: sw s1, 4(s0) +; RV64I-NEXT: sw s2, 8(s0) +; RV64I-NEXT: sw a0, 12(s0) +; RV64I-NEXT: sw a1, 16(s0) +; RV64I-NEXT: sw a2, 20(s0) +; RV64I-NEXT: sw a3, 24(s0) +; RV64I-NEXT: sw a4, 28(s0) ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -998,30 +996,29 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw s0, 4(a1) -; RV32I-NEXT: lw s1, 8(a1) -; RV32I-NEXT: lw s2, 12(a1) -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a0, 0(a1) +; RV32I-NEXT: lw s1, 4(a1) +; RV32I-NEXT: lw s2, 8(a1) +; RV32I-NEXT: lw s3, 12(a1) ; RV32I-NEXT: addi a1, sp, 8 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: addi a1, sp, 12 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call frexpf -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: addi a1, sp, 16 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: addi a1, sp, 20 +; RV32I-NEXT: addi a1, sp, 16 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call frexpf -; RV32I-NEXT: sw s4, 0(s3) -; RV32I-NEXT: sw s0, 4(s3) -; RV32I-NEXT: sw s1, 8(s3) -; RV32I-NEXT: sw a0, 12(s3) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: addi a1, sp, 20 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call frexpf +; RV32I-NEXT: sw s4, 0(s0) +; RV32I-NEXT: sw s1, 4(s0) +; RV32I-NEXT: sw s2, 8(s0) +; RV32I-NEXT: sw a0, 12(s0) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1040,30 +1037,29 @@ define <4 x float> @test_frexp_v4f32_v4i32_only_use_fract(<4 x float> %a) nounwi ; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a2, 0(a1) -; RV64I-NEXT: lw s0, 8(a1) -; RV64I-NEXT: lw s1, 16(a1) -; RV64I-NEXT: lw s2, 24(a1) -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lw a0, 0(a1) +; RV64I-NEXT: lw s1, 8(a1) +; RV64I-NEXT: lw s2, 16(a1) +; RV64I-NEXT: lw s3, 24(a1) ; RV64I-NEXT: mv a1, sp -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: addi a1, sp, 4 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call frexpf -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: addi a1, sp, 8 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: addi a1, sp, 12 +; RV64I-NEXT: addi a1, sp, 8 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call frexpf -; RV64I-NEXT: sw s4, 0(s3) -; RV64I-NEXT: sw s0, 4(s3) -; RV64I-NEXT: sw s1, 8(s3) -; RV64I-NEXT: sw a0, 12(s3) +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: addi a1, sp, 12 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call frexpf +; RV64I-NEXT: sw s4, 0(s0) +; RV64I-NEXT: sw s1, 4(s0) +; RV64I-NEXT: sw s2, 8(s0) +; RV64I-NEXT: sw a0, 12(s0) ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -1230,31 +1226,30 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV32I-NEXT: sw s1, 36(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 32(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a2, 0(a1) -; RV32I-NEXT: lw s0, 4(a1) -; RV32I-NEXT: lw s1, 8(a1) -; RV32I-NEXT: lw s2, 12(a1) -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a0, 0(a1) +; RV32I-NEXT: lw s1, 4(a1) +; RV32I-NEXT: lw s2, 8(a1) +; RV32I-NEXT: lw s3, 12(a1) ; RV32I-NEXT: addi a1, sp, 12 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: addi a1, sp, 16 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: addi a1, sp, 20 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: addi a1, sp, 24 -; RV32I-NEXT: mv a0, s2 +; RV32I-NEXT: mv a0, s3 ; RV32I-NEXT: call frexpf ; RV32I-NEXT: lw a0, 12(sp) ; RV32I-NEXT: lw a1, 16(sp) ; RV32I-NEXT: lw a2, 20(sp) ; RV32I-NEXT: lw a3, 24(sp) -; RV32I-NEXT: sw a0, 0(s3) -; RV32I-NEXT: sw a1, 4(s3) -; RV32I-NEXT: sw a2, 8(s3) -; RV32I-NEXT: sw a3, 12(s3) +; RV32I-NEXT: sw a0, 0(s0) +; RV32I-NEXT: sw a1, 4(s0) +; RV32I-NEXT: sw a2, 8(s0) +; RV32I-NEXT: sw a3, 12(s0) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 40(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 36(sp) # 4-byte Folded Reload @@ -1271,31 +1266,30 @@ define <4 x i32> @test_frexp_v4f32_v4i32_only_use_exp(<4 x float> %a) nounwind { ; RV64I-NEXT: sd s1, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 32(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a2, 0(a1) -; RV64I-NEXT: lw s0, 8(a1) -; RV64I-NEXT: lw s1, 16(a1) -; RV64I-NEXT: lw s2, 24(a1) -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lw a0, 0(a1) +; RV64I-NEXT: lw s1, 8(a1) +; RV64I-NEXT: lw s2, 16(a1) +; RV64I-NEXT: lw s3, 24(a1) ; RV64I-NEXT: addi a1, sp, 8 -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: addi a1, sp, 12 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: addi a1, sp, 16 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: addi a1, sp, 20 -; RV64I-NEXT: mv a0, s2 +; RV64I-NEXT: mv a0, s3 ; RV64I-NEXT: call frexpf ; RV64I-NEXT: lw a0, 8(sp) ; RV64I-NEXT: lw a1, 12(sp) ; RV64I-NEXT: lw a2, 16(sp) ; RV64I-NEXT: lw a3, 20(sp) -; RV64I-NEXT: sw a0, 0(s3) -; RV64I-NEXT: sw a1, 4(s3) -; RV64I-NEXT: sw a2, 8(s3) -; RV64I-NEXT: sw a3, 12(s3) +; RV64I-NEXT: sw a0, 0(s0) +; RV64I-NEXT: sw a1, 4(s0) +; RV64I-NEXT: sw a2, 8(s0) +; RV64I-NEXT: sw a3, 12(s0) ; RV64I-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 48(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 40(sp) # 8-byte Folded Reload @@ -1547,18 +1541,18 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV32IFD-NEXT: addi sp, sp, -48 ; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: lw a3, 0(a1) -; RV32IFD-NEXT: lw a4, 4(a1) -; RV32IFD-NEXT: lw a5, 8(a1) -; RV32IFD-NEXT: lw a6, 12(a1) ; RV32IFD-NEXT: mv s0, a0 +; RV32IFD-NEXT: lw a0, 0(a1) +; RV32IFD-NEXT: lw a2, 4(a1) +; RV32IFD-NEXT: lw a3, 8(a1) +; RV32IFD-NEXT: lw a1, 12(a1) +; RV32IFD-NEXT: sw a0, 0(sp) +; RV32IFD-NEXT: sw a2, 4(sp) +; RV32IFD-NEXT: sw a3, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: addi a0, sp, 16 ; RV32IFD-NEXT: mv a1, sp ; RV32IFD-NEXT: addi a2, sp, 36 -; RV32IFD-NEXT: sw a3, 0(sp) -; RV32IFD-NEXT: sw a4, 4(sp) -; RV32IFD-NEXT: sw a5, 8(sp) -; RV32IFD-NEXT: sw a6, 12(sp) ; RV32IFD-NEXT: call frexpl ; RV32IFD-NEXT: lw a0, 36(sp) ; RV32IFD-NEXT: lw a1, 16(sp) @@ -1600,18 +1594,18 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -48 ; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: lw a3, 0(a1) -; RV32IZFINXZDINX-NEXT: lw a4, 4(a1) -; RV32IZFINXZDINX-NEXT: lw a5, 8(a1) -; RV32IZFINXZDINX-NEXT: lw a6, 12(a1) ; RV32IZFINXZDINX-NEXT: mv s0, a0 +; RV32IZFINXZDINX-NEXT: lw a0, 0(a1) +; RV32IZFINXZDINX-NEXT: lw a2, 4(a1) +; RV32IZFINXZDINX-NEXT: lw a3, 8(a1) +; RV32IZFINXZDINX-NEXT: lw a1, 12(a1) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a2, 4(sp) +; RV32IZFINXZDINX-NEXT: sw a3, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: addi a0, sp, 16 ; RV32IZFINXZDINX-NEXT: mv a1, sp ; RV32IZFINXZDINX-NEXT: addi a2, sp, 36 -; RV32IZFINXZDINX-NEXT: sw a3, 0(sp) -; RV32IZFINXZDINX-NEXT: sw a4, 4(sp) -; RV32IZFINXZDINX-NEXT: sw a5, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a6, 12(sp) ; RV32IZFINXZDINX-NEXT: call frexpl ; RV32IZFINXZDINX-NEXT: lw a0, 36(sp) ; RV32IZFINXZDINX-NEXT: lw a1, 16(sp) @@ -1653,18 +1647,18 @@ define { fp128, i32 } @test_frexp_f128_i32(fp128 %a) nounwind { ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a0, 0(a1) +; RV32I-NEXT: lw a2, 4(a1) +; RV32I-NEXT: lw a3, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: sw a3, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: addi a2, sp, 36 -; RV32I-NEXT: sw a3, 0(sp) -; RV32I-NEXT: sw a4, 4(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: call frexpl ; RV32I-NEXT: lw a0, 36(sp) ; RV32I-NEXT: lw a1, 16(sp) @@ -1710,18 +1704,18 @@ define fp128 @test_frexp_f128_i32_only_use_fract(fp128 %a) nounwind { ; RV32IFD-NEXT: addi sp, sp, -48 ; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32IFD-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: lw a3, 0(a1) -; RV32IFD-NEXT: lw a4, 4(a1) -; RV32IFD-NEXT: lw a5, 8(a1) -; RV32IFD-NEXT: lw a6, 12(a1) ; RV32IFD-NEXT: mv s0, a0 +; RV32IFD-NEXT: lw a0, 0(a1) +; RV32IFD-NEXT: lw a2, 4(a1) +; RV32IFD-NEXT: lw a3, 8(a1) +; RV32IFD-NEXT: lw a1, 12(a1) +; RV32IFD-NEXT: sw a0, 0(sp) +; RV32IFD-NEXT: sw a2, 4(sp) +; RV32IFD-NEXT: sw a3, 8(sp) +; RV32IFD-NEXT: sw a1, 12(sp) ; RV32IFD-NEXT: addi a0, sp, 16 ; RV32IFD-NEXT: mv a1, sp ; RV32IFD-NEXT: addi a2, sp, 36 -; RV32IFD-NEXT: sw a3, 0(sp) -; RV32IFD-NEXT: sw a4, 4(sp) -; RV32IFD-NEXT: sw a5, 8(sp) -; RV32IFD-NEXT: sw a6, 12(sp) ; RV32IFD-NEXT: call frexpl ; RV32IFD-NEXT: lw a0, 16(sp) ; RV32IFD-NEXT: lw a1, 20(sp) @@ -1751,18 +1745,18 @@ define fp128 @test_frexp_f128_i32_only_use_fract(fp128 %a) nounwind { ; RV32IZFINXZDINX-NEXT: addi sp, sp, -48 ; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32IZFINXZDINX-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: lw a3, 0(a1) -; RV32IZFINXZDINX-NEXT: lw a4, 4(a1) -; RV32IZFINXZDINX-NEXT: lw a5, 8(a1) -; RV32IZFINXZDINX-NEXT: lw a6, 12(a1) ; RV32IZFINXZDINX-NEXT: mv s0, a0 +; RV32IZFINXZDINX-NEXT: lw a0, 0(a1) +; RV32IZFINXZDINX-NEXT: lw a2, 4(a1) +; RV32IZFINXZDINX-NEXT: lw a3, 8(a1) +; RV32IZFINXZDINX-NEXT: lw a1, 12(a1) +; RV32IZFINXZDINX-NEXT: sw a0, 0(sp) +; RV32IZFINXZDINX-NEXT: sw a2, 4(sp) +; RV32IZFINXZDINX-NEXT: sw a3, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a1, 12(sp) ; RV32IZFINXZDINX-NEXT: addi a0, sp, 16 ; RV32IZFINXZDINX-NEXT: mv a1, sp ; RV32IZFINXZDINX-NEXT: addi a2, sp, 36 -; RV32IZFINXZDINX-NEXT: sw a3, 0(sp) -; RV32IZFINXZDINX-NEXT: sw a4, 4(sp) -; RV32IZFINXZDINX-NEXT: sw a5, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a6, 12(sp) ; RV32IZFINXZDINX-NEXT: call frexpl ; RV32IZFINXZDINX-NEXT: lw a0, 16(sp) ; RV32IZFINXZDINX-NEXT: lw a1, 20(sp) @@ -1792,18 +1786,18 @@ define fp128 @test_frexp_f128_i32_only_use_fract(fp128 %a) nounwind { ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s0, 40(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a6, 12(a1) ; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lw a0, 0(a1) +; RV32I-NEXT: lw a2, 4(a1) +; RV32I-NEXT: lw a3, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: sw a0, 0(sp) +; RV32I-NEXT: sw a2, 4(sp) +; RV32I-NEXT: sw a3, 8(sp) +; RV32I-NEXT: sw a1, 12(sp) ; RV32I-NEXT: addi a0, sp, 16 ; RV32I-NEXT: mv a1, sp ; RV32I-NEXT: addi a2, sp, 36 -; RV32I-NEXT: sw a3, 0(sp) -; RV32I-NEXT: sw a4, 4(sp) -; RV32I-NEXT: sw a5, 8(sp) -; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: call frexpl ; RV32I-NEXT: lw a0, 16(sp) ; RV32I-NEXT: lw a1, 20(sp) @@ -1837,17 +1831,17 @@ define i32 @test_frexp_f128_i32_only_use_exp(fp128 %a) nounwind { ; RV32IFD: # %bb.0: ; RV32IFD-NEXT: addi sp, sp, -48 ; RV32IFD-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32IFD-NEXT: lw a3, 0(a0) -; RV32IFD-NEXT: lw a4, 4(a0) -; RV32IFD-NEXT: lw a5, 8(a0) -; RV32IFD-NEXT: lw a6, 12(a0) +; RV32IFD-NEXT: lw a1, 0(a0) +; RV32IFD-NEXT: lw a2, 4(a0) +; RV32IFD-NEXT: lw a3, 8(a0) +; RV32IFD-NEXT: lw a0, 12(a0) +; RV32IFD-NEXT: sw a1, 8(sp) +; RV32IFD-NEXT: sw a2, 12(sp) +; RV32IFD-NEXT: sw a3, 16(sp) +; RV32IFD-NEXT: sw a0, 20(sp) ; RV32IFD-NEXT: addi a0, sp, 24 ; RV32IFD-NEXT: addi a1, sp, 8 ; RV32IFD-NEXT: addi a2, sp, 40 -; RV32IFD-NEXT: sw a3, 8(sp) -; RV32IFD-NEXT: sw a4, 12(sp) -; RV32IFD-NEXT: sw a5, 16(sp) -; RV32IFD-NEXT: sw a6, 20(sp) ; RV32IFD-NEXT: call frexpl ; RV32IFD-NEXT: lw a0, 40(sp) ; RV32IFD-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -1869,17 +1863,17 @@ define i32 @test_frexp_f128_i32_only_use_exp(fp128 %a) nounwind { ; RV32IZFINXZDINX: # %bb.0: ; RV32IZFINXZDINX-NEXT: addi sp, sp, -48 ; RV32IZFINXZDINX-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32IZFINXZDINX-NEXT: lw a3, 0(a0) -; RV32IZFINXZDINX-NEXT: lw a4, 4(a0) -; RV32IZFINXZDINX-NEXT: lw a5, 8(a0) -; RV32IZFINXZDINX-NEXT: lw a6, 12(a0) +; RV32IZFINXZDINX-NEXT: lw a1, 0(a0) +; RV32IZFINXZDINX-NEXT: lw a2, 4(a0) +; RV32IZFINXZDINX-NEXT: lw a3, 8(a0) +; RV32IZFINXZDINX-NEXT: lw a0, 12(a0) +; RV32IZFINXZDINX-NEXT: sw a1, 8(sp) +; RV32IZFINXZDINX-NEXT: sw a2, 12(sp) +; RV32IZFINXZDINX-NEXT: sw a3, 16(sp) +; RV32IZFINXZDINX-NEXT: sw a0, 20(sp) ; RV32IZFINXZDINX-NEXT: addi a0, sp, 24 ; RV32IZFINXZDINX-NEXT: addi a1, sp, 8 ; RV32IZFINXZDINX-NEXT: addi a2, sp, 40 -; RV32IZFINXZDINX-NEXT: sw a3, 8(sp) -; RV32IZFINXZDINX-NEXT: sw a4, 12(sp) -; RV32IZFINXZDINX-NEXT: sw a5, 16(sp) -; RV32IZFINXZDINX-NEXT: sw a6, 20(sp) ; RV32IZFINXZDINX-NEXT: call frexpl ; RV32IZFINXZDINX-NEXT: lw a0, 40(sp) ; RV32IZFINXZDINX-NEXT: lw ra, 44(sp) # 4-byte Folded Reload @@ -1901,17 +1895,17 @@ define i32 @test_frexp_f128_i32_only_use_exp(fp128 %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -48 ; RV32I-NEXT: sw ra, 44(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a3, 0(a0) -; RV32I-NEXT: lw a4, 4(a0) -; RV32I-NEXT: lw a5, 8(a0) -; RV32I-NEXT: lw a6, 12(a0) +; RV32I-NEXT: lw a1, 0(a0) +; RV32I-NEXT: lw a2, 4(a0) +; RV32I-NEXT: lw a3, 8(a0) +; RV32I-NEXT: lw a0, 12(a0) +; RV32I-NEXT: sw a1, 8(sp) +; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: sw a3, 16(sp) +; RV32I-NEXT: sw a0, 20(sp) ; RV32I-NEXT: addi a0, sp, 24 ; RV32I-NEXT: addi a1, sp, 8 ; RV32I-NEXT: addi a2, sp, 40 -; RV32I-NEXT: sw a3, 8(sp) -; RV32I-NEXT: sw a4, 12(sp) -; RV32I-NEXT: sw a5, 16(sp) -; RV32I-NEXT: sw a6, 20(sp) ; RV32I-NEXT: call frexpl ; RV32I-NEXT: lw a0, 40(sp) ; RV32I-NEXT: lw ra, 44(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll index fa8ca071d2189..627f0005932a3 100644 --- a/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll +++ b/llvm/test/CodeGen/RISCV/loop-strength-reduce-add-cheaper-than-mul.ll @@ -43,8 +43,8 @@ define void @test(i32 signext %i) nounwind { ; RV32-NEXT: .LBB0_2: # %bb ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-NEXT: add a4, a2, a1 -; RV32-NEXT: add a1, a1, a0 ; RV32-NEXT: sb zero, 0(a4) +; RV32-NEXT: add a1, a1, a0 ; RV32-NEXT: blt a1, a3, .LBB0_2 ; RV32-NEXT: .LBB0_3: # %return ; RV32-NEXT: ret @@ -63,8 +63,8 @@ define void @test(i32 signext %i) nounwind { ; RV64-NEXT: slli a4, a1, 32 ; RV64-NEXT: srli a4, a4, 32 ; RV64-NEXT: add a4, a2, a4 -; RV64-NEXT: addw a1, a1, a0 ; RV64-NEXT: sb zero, 0(a4) +; RV64-NEXT: addw a1, a1, a0 ; RV64-NEXT: blt a1, a3, .LBB0_2 ; RV64-NEXT: .LBB0_3: # %return ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll index eb84774014a4b..b3777668e20bd 100644 --- a/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll +++ b/llvm/test/CodeGen/RISCV/machine-sink-load-immediate.ll @@ -319,8 +319,8 @@ define signext i32 @branch_dispatch(i8 %a) { ; CHECK-NEXT: li a1, 70 ; CHECK-NEXT: beq a0, a1, .LBB3_9 ; CHECK-NEXT: # %bb.3: # %case.3 -; CHECK-NEXT: li a1, 234 ; CHECK-NEXT: li s0, 23 +; CHECK-NEXT: li a1, 234 ; CHECK-NEXT: beq a0, a1, .LBB3_10 ; CHECK-NEXT: # %bb.4: # %case.4 ; CHECK-NEXT: beqz a0, .LBB3_11 diff --git a/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll b/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll index 8deb17582cb11..ae9572328bd5d 100644 --- a/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll +++ b/llvm/test/CodeGen/RISCV/machinelicm-address-pseudos.ll @@ -57,29 +57,29 @@ ret: define void @test_la(i32 signext %n) { ; RV32I-LABEL: test_la: ; RV32I: # %bb.0: # %entry +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: .Lpcrel_hi1: -; RV32I-NEXT: auipc a1, %got_pcrel_hi(g) -; RV32I-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi1)(a1) -; RV32I-NEXT: li a2, 0 +; RV32I-NEXT: auipc a2, %got_pcrel_hi(g) +; RV32I-NEXT: lw a2, %pcrel_lo(.Lpcrel_hi1)(a2) ; RV32I-NEXT: .LBB1_1: # %loop ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV32I-NEXT: lw zero, 0(a1) -; RV32I-NEXT: addi a2, a2, 1 -; RV32I-NEXT: blt a2, a0, .LBB1_1 +; RV32I-NEXT: lw zero, 0(a2) +; RV32I-NEXT: addi a1, a1, 1 +; RV32I-NEXT: blt a1, a0, .LBB1_1 ; RV32I-NEXT: # %bb.2: # %ret ; RV32I-NEXT: ret ; ; RV64I-LABEL: test_la: ; RV64I: # %bb.0: # %entry +; RV64I-NEXT: li a1, 0 ; RV64I-NEXT: .Lpcrel_hi1: -; RV64I-NEXT: auipc a1, %got_pcrel_hi(g) -; RV64I-NEXT: ld a1, %pcrel_lo(.Lpcrel_hi1)(a1) -; RV64I-NEXT: li a2, 0 +; RV64I-NEXT: auipc a2, %got_pcrel_hi(g) +; RV64I-NEXT: ld a2, %pcrel_lo(.Lpcrel_hi1)(a2) ; RV64I-NEXT: .LBB1_1: # %loop ; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 -; RV64I-NEXT: lw zero, 0(a1) -; RV64I-NEXT: addiw a2, a2, 1 -; RV64I-NEXT: blt a2, a0, .LBB1_1 +; RV64I-NEXT: lw zero, 0(a2) +; RV64I-NEXT: addiw a1, a1, 1 +; RV64I-NEXT: blt a1, a0, .LBB1_1 ; RV64I-NEXT: # %bb.2: # %ret ; RV64I-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll index d1b10af16063a..78b34452adef6 100644 --- a/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll +++ b/llvm/test/CodeGen/RISCV/macro-fusion-lui-addi.ll @@ -118,10 +118,9 @@ define void @test_regalloc_hint(i32 noundef signext %0, i32 noundef signext %1) ; ; FUSION-GENERIC-LABEL: test_regalloc_hint: ; FUSION-GENERIC: # %bb.0: -; FUSION-GENERIC-NEXT: lui a2, 3014 -; FUSION-GENERIC-NEXT: addiw a2, a2, 334 ; FUSION-GENERIC-NEXT: mv a0, a1 -; FUSION-GENERIC-NEXT: mv a1, a2 +; FUSION-GENERIC-NEXT: lui a1, 3014 +; FUSION-GENERIC-NEXT: addiw a1, a1, 334 ; FUSION-GENERIC-NEXT: tail bar tail call void @bar(i32 noundef signext %1, i32 noundef signext 12345678) ret void diff --git a/llvm/test/CodeGen/RISCV/mem.ll b/llvm/test/CodeGen/RISCV/mem.ll index a9cb80cb66349..cbfb63785661a 100644 --- a/llvm/test/CodeGen/RISCV/mem.ll +++ b/llvm/test/CodeGen/RISCV/mem.ll @@ -123,8 +123,9 @@ define dso_local i32 @load_sext_zext_anyext_i1(ptr %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a1, 1(a0) ; RV32I-NEXT: lbu a2, 2(a0) +; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: lbu zero, 0(a0) -; RV32I-NEXT: sub a0, a2, a1 +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret ; sextload i1 %1 = getelementptr i1, ptr %a, i32 1 @@ -145,8 +146,9 @@ define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lbu a1, 1(a0) ; RV32I-NEXT: lbu a2, 2(a0) +; RV32I-NEXT: sub a1, a2, a1 ; RV32I-NEXT: lbu zero, 0(a0) -; RV32I-NEXT: sub a0, a2, a1 +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret ; sextload i1 %1 = getelementptr i1, ptr %a, i32 1 diff --git a/llvm/test/CodeGen/RISCV/mem64.ll b/llvm/test/CodeGen/RISCV/mem64.ll index 248964146325a..254a1f85faa00 100644 --- a/llvm/test/CodeGen/RISCV/mem64.ll +++ b/llvm/test/CodeGen/RISCV/mem64.ll @@ -168,8 +168,9 @@ define dso_local i64 @load_sext_zext_anyext_i1(ptr %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: lbu a1, 1(a0) ; RV64I-NEXT: lbu a2, 2(a0) +; RV64I-NEXT: sub a1, a2, a1 ; RV64I-NEXT: lbu zero, 0(a0) -; RV64I-NEXT: sub a0, a2, a1 +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret ; sextload i1 %1 = getelementptr i1, ptr %a, i32 1 @@ -190,8 +191,9 @@ define dso_local i16 @load_sext_zext_anyext_i1_i16(ptr %a) nounwind { ; RV64I: # %bb.0: ; RV64I-NEXT: lbu a1, 1(a0) ; RV64I-NEXT: lbu a2, 2(a0) +; RV64I-NEXT: sub a1, a2, a1 ; RV64I-NEXT: lbu zero, 0(a0) -; RV64I-NEXT: sub a0, a2, a1 +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret ; sextload i1 %1 = getelementptr i1, ptr %a, i32 1 diff --git a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll index f9086ba9d6354..6a63e80717623 100644 --- a/llvm/test/CodeGen/RISCV/memcmp-optsize.ll +++ b/llvm/test/CodeGen/RISCV/memcmp-optsize.ll @@ -2449,14 +2449,14 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV32-ZBB-LABEL: memcmp_size_3: ; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a2, 2(a0) -; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a0, 0(a0) -; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a3, 2(a1) -; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a1, 0(a1) -; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a2, a2, 16 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a3, a3, 16 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a1, a1, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a0, 2(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a0, a0, 16 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a2, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a1, 2(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a1, a1, 16 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a1, a2, a1 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a0, a1 @@ -2466,14 +2466,14 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_3: ; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a2, 2(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 0(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a3, 2(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 0(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a2, a2, 16 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a3, a3, 16 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a1, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 2(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a0, a0, 16 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 2(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a1, a1, 16 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32 @@ -2487,10 +2487,10 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a2, 0(a0) ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a0, 2(a0) -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a3, 0(a1) -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 2(a1) ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: pack a0, a2, a0 -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: pack a1, a3, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a2, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 2(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: pack a1, a2, a1 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a0, a1 @@ -2500,14 +2500,14 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_3: ; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a2, 2(a0) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 0(a0) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a3, 2(a1) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 0(a1) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slli a2, a2, 16 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slli a3, a3, 16 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a1, a1, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 2(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slli a0, a0, 16 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 2(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slli a1, a1, 16 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32 @@ -2835,14 +2835,14 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_5: ; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a2, 4(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a0, 0(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a3, 4(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a1, 0(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a2, a2, 32 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a3, a3, 32 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a1, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a0, a0, 32 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a1, a1, 32 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1 @@ -2872,10 +2872,10 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a0) ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a3, 0(a1) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1) ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a0, a2, a0 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a3, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1 @@ -3034,14 +3034,14 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { ; ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_6: ; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a2, 4(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a0, 0(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a3, 4(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a1, 0(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a2, a2, 32 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a3, a3, 32 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a1, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a0, a0, 32 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a1, a1, 32 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1 @@ -3077,10 +3077,10 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a0) ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a3, 0(a1) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1) ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a0, a2, a0 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a3, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1 @@ -4410,104 +4410,104 @@ entry: define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-ALIGNED-RV32-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV32: # %bb.0: # %entry -; CHECK-ALIGNED-RV32-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV32-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV32-NEXT: lbu a4, 3(a1) -; CHECK-ALIGNED-RV32-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV32-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV32-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV32-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV32-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV32-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV32-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV32-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV32-NEXT: lbu a0, 3(a0) -; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV32-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV32-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a1, 3(a1) +; CHECK-ALIGNED-RV32-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV32-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV32-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV32-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a0, 3(a0) +; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV32-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV32-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV32-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV32-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV32-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV32-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV32-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV32-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV32-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV32-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV32-NEXT: ret ; ; CHECK-ALIGNED-RV64-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV64: # %bb.0: # %entry -; CHECK-ALIGNED-RV64-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV64-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV64-NEXT: lb a4, 3(a1) -; CHECK-ALIGNED-RV64-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV64-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV64-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV64-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV64-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV64-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV64-NEXT: lb a0, 3(a0) -; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV64-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV64-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV64-NEXT: lb a1, 3(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV64-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV64-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV64-NEXT: lb a0, 3(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV64-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV64-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV64-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV64-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV64-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV64-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV64-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV64-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV64-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV64-NEXT: ret ; ; CHECK-ALIGNED-RV32-ZBB-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV32-ZBB: # %bb.0: # %entry -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a4, 3(a1) -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a0, 3(a0) -; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a1, 3(a1) +; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a0, 3(a0) +; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV32-ZBB-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV32-ZBB-NEXT: ret ; ; CHECK-ALIGNED-RV64-ZBB-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV64-ZBB: # %bb.0: # %entry -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV64-ZBB-NEXT: lb a4, 3(a1) -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV64-ZBB-NEXT: lb a0, 3(a0) -; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lb a1, 3(a1) +; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lb a0, 3(a0) +; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV64-ZBB-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV64-ZBB-NEXT: ret @@ -4518,16 +4518,16 @@ define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a3, 1(a1) ; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a4, 2(a1) ; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a1, 3(a1) -; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a6, 2(a0) -; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a7, 3(a0) -; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a0, 0(a0) ; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a1, a4, a1 ; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a2, a2, a3 -; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a3, a6, a7 -; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a0, a0, a5 +; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a0, 3(a0) +; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a0, a5, a0 +; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a3, a3, a4 ; CHECK-ALIGNED-RV32-ZBKB-NEXT: pack a1, a2, a1 -; CHECK-ALIGNED-RV32-ZBKB-NEXT: pack a0, a0, a3 +; CHECK-ALIGNED-RV32-ZBKB-NEXT: pack a0, a3, a0 ; CHECK-ALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV32-ZBKB-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV32-ZBKB-NEXT: ret @@ -4538,72 +4538,72 @@ define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind optsize { ; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a3, 1(a1) ; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a4, 2(a1) ; CHECK-ALIGNED-RV64-ZBKB-NEXT: lb a1, 3(a1) -; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a5, 0(a0) -; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a6, 1(a0) ; CHECK-ALIGNED-RV64-ZBKB-NEXT: packh a2, a2, a3 -; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a3, 2(a0) +; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a5, 1(a0) +; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a6, 2(a0) ; CHECK-ALIGNED-RV64-ZBKB-NEXT: lb a0, 3(a0) -; CHECK-ALIGNED-RV64-ZBKB-NEXT: packh a5, a5, a6 +; CHECK-ALIGNED-RV64-ZBKB-NEXT: packh a3, a3, a5 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: slli a4, a4, 16 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: slli a1, a1, 24 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a1, a1, a4 -; CHECK-ALIGNED-RV64-ZBKB-NEXT: slli a3, a3, 16 +; CHECK-ALIGNED-RV64-ZBKB-NEXT: slli a6, a6, 16 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a0, a0, a3 +; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a0, a0, a6 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a1, a1, a2 -; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: ret ; ; CHECK-ALIGNED-RV32-V-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 3(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV32-V-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV32-V-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 3(a0) -; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV32-V-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 3(a1) +; CHECK-ALIGNED-RV32-V-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV32-V-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV32-V-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV32-V-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 3(a0) +; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV32-V-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV32-V-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV32-V-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV32-V-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV32-V-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV32-V-NEXT: ret ; ; CHECK-ALIGNED-RV64-V-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-ALIGNED-RV64-V-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lb a4, 3(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV64-V-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lb a0, 3(a0) -; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV64-V-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV64-V-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV64-V-NEXT: lb a1, 3(a1) +; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV64-V-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV64-V-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV64-V-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV64-V-NEXT: lb a0, 3(a0) +; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-V-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV64-V-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV64-V-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV64-V-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV64-V-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV64-V-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV64-V-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/memcmp.ll b/llvm/test/CodeGen/RISCV/memcmp.ll index f0290298e362a..ec83f16682296 100644 --- a/llvm/test/CodeGen/RISCV/memcmp.ll +++ b/llvm/test/CodeGen/RISCV/memcmp.ll @@ -3355,14 +3355,14 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind { ; ; CHECK-UNALIGNED-RV32-ZBB-LABEL: memcmp_size_3: ; CHECK-UNALIGNED-RV32-ZBB: # %bb.0: # %entry -; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a2, 2(a0) -; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a0, 0(a0) -; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a3, 2(a1) -; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a1, 0(a1) -; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a2, a2, 16 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a3, a3, 16 -; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a1, a1, a3 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a2, 0(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a0, 2(a0) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a0, a0, 16 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lhu a2, 0(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: lbu a1, 2(a1) +; CHECK-UNALIGNED-RV32-ZBB-NEXT: slli a1, a1, 16 +; CHECK-UNALIGNED-RV32-ZBB-NEXT: or a1, a2, a1 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV32-ZBB-NEXT: sltu a2, a0, a1 @@ -3372,14 +3372,14 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind { ; ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_3: ; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a2, 2(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 0(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a3, 2(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 0(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a2, a2, 16 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a3, a3, 16 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a1, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 2(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a0, a0, 16 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 2(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a1, a1, 16 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: srli a0, a0, 32 @@ -3393,10 +3393,10 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV32-ZBKB: # %bb.0: # %entry ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a2, 0(a0) ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a0, 2(a0) -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a3, 0(a1) -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 2(a1) ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: pack a0, a2, a0 -; CHECK-UNALIGNED-RV32-ZBKB-NEXT: pack a1, a3, a1 +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lhu a2, 0(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: lbu a1, 2(a1) +; CHECK-UNALIGNED-RV32-ZBKB-NEXT: pack a1, a2, a1 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV32-ZBKB-NEXT: sltu a2, a0, a1 @@ -3406,14 +3406,14 @@ define i32 @memcmp_size_3(ptr %s1, ptr %s2) nounwind { ; ; CHECK-UNALIGNED-RV64-ZBKB-LABEL: memcmp_size_3: ; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a2, 2(a0) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 0(a0) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a3, 2(a1) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 0(a1) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slli a2, a2, 16 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slli a3, a3, 16 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a1, a1, a3 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 2(a0) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slli a0, a0, 16 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 2(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: slli a1, a1, 16 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: or a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: srli a0, a0, 32 @@ -3741,14 +3741,14 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind { ; ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_5: ; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a2, 4(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a0, 0(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a3, 4(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a1, 0(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a2, a2, 32 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a3, a3, 32 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a1, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a0, a0, 32 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a1, a1, 32 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1 @@ -3778,10 +3778,10 @@ define i32 @memcmp_size_5(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a0) ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a0, 4(a0) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a3, 0(a1) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1) ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a0, a2, a0 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a3, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lbu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1 @@ -3940,14 +3940,14 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind { ; ; CHECK-UNALIGNED-RV64-ZBB-LABEL: memcmp_size_6: ; CHECK-UNALIGNED-RV64-ZBB: # %bb.0: # %entry -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a2, 4(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a0, 0(a0) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a3, 4(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a1, 0(a1) -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a2, a2, 32 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a0, a2 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a3, a3, 32 -; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a1, a3 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a2, 0(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a0, 4(a0) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a0, a0, 32 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a0, a2, a0 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lwu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBB-NEXT: slli a1, a1, 32 +; CHECK-UNALIGNED-RV64-ZBB-NEXT: or a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBB-NEXT: sltu a2, a0, a1 @@ -3983,10 +3983,10 @@ define i32 @memcmp_size_6(ptr %s1, ptr %s2) nounwind { ; CHECK-UNALIGNED-RV64-ZBKB: # %bb.0: # %entry ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a0) ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a0, 4(a0) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a3, 0(a1) -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1) ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a0, a2, a0 -; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a3, a1 +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lwu a2, 0(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: lhu a1, 4(a1) +; CHECK-UNALIGNED-RV64-ZBKB-NEXT: pack a1, a2, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a0, a0 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: rev8 a1, a1 ; CHECK-UNALIGNED-RV64-ZBKB-NEXT: sltu a2, a0, a1 @@ -5980,104 +5980,104 @@ entry: define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-ALIGNED-RV32-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV32: # %bb.0: # %entry -; CHECK-ALIGNED-RV32-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV32-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV32-NEXT: lbu a4, 3(a1) -; CHECK-ALIGNED-RV32-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV32-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV32-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV32-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV32-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV32-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV32-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV32-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV32-NEXT: lbu a0, 3(a0) -; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV32-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV32-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV32-NEXT: lbu a1, 3(a1) +; CHECK-ALIGNED-RV32-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV32-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV32-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV32-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV32-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV32-NEXT: lbu a0, 3(a0) +; CHECK-ALIGNED-RV32-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV32-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV32-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV32-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV32-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV32-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV32-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV32-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV32-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV32-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV32-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV32-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV32-NEXT: ret ; ; CHECK-ALIGNED-RV64-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV64: # %bb.0: # %entry -; CHECK-ALIGNED-RV64-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV64-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV64-NEXT: lb a4, 3(a1) -; CHECK-ALIGNED-RV64-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV64-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV64-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV64-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV64-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV64-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV64-NEXT: lb a0, 3(a0) -; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV64-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV64-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV64-NEXT: lb a1, 3(a1) +; CHECK-ALIGNED-RV64-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV64-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV64-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV64-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV64-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV64-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV64-NEXT: lb a0, 3(a0) +; CHECK-ALIGNED-RV64-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV64-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV64-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV64-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV64-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV64-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV64-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV64-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV64-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV64-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV64-NEXT: ret ; ; CHECK-ALIGNED-RV32-ZBB-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV32-ZBB: # %bb.0: # %entry -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a4, 3(a1) -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a0, 3(a0) -; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a1, 3(a1) +; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV32-ZBB-NEXT: lbu a0, 3(a0) +; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV32-ZBB-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV32-ZBB-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV32-ZBB-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV32-ZBB-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV32-ZBB-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV32-ZBB-NEXT: ret ; ; CHECK-ALIGNED-RV64-ZBB-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV64-ZBB: # %bb.0: # %entry -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV64-ZBB-NEXT: lb a4, 3(a1) -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV64-ZBB-NEXT: lb a0, 3(a0) -; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lb a1, 3(a1) +; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV64-ZBB-NEXT: lb a0, 3(a0) +; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV64-ZBB-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV64-ZBB-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV64-ZBB-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV64-ZBB-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV64-ZBB-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV64-ZBB-NEXT: ret @@ -6088,16 +6088,16 @@ define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a3, 1(a1) ; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a4, 2(a1) ; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a1, 3(a1) -; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a6, 2(a0) -; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a7, 3(a0) -; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a0, 0(a0) ; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a1, a4, a1 ; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a2, a2, a3 -; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a3, a6, a7 -; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a0, a0, a5 +; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV32-ZBKB-NEXT: lbu a0, 3(a0) +; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a0, a5, a0 +; CHECK-ALIGNED-RV32-ZBKB-NEXT: packh a3, a3, a4 ; CHECK-ALIGNED-RV32-ZBKB-NEXT: pack a1, a2, a1 -; CHECK-ALIGNED-RV32-ZBKB-NEXT: pack a0, a0, a3 +; CHECK-ALIGNED-RV32-ZBKB-NEXT: pack a0, a3, a0 ; CHECK-ALIGNED-RV32-ZBKB-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV32-ZBKB-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV32-ZBKB-NEXT: ret @@ -6108,72 +6108,72 @@ define i1 @memcmp_eq_zero(ptr %s1, ptr %s2) nounwind { ; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a3, 1(a1) ; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a4, 2(a1) ; CHECK-ALIGNED-RV64-ZBKB-NEXT: lb a1, 3(a1) -; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a5, 0(a0) -; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a6, 1(a0) ; CHECK-ALIGNED-RV64-ZBKB-NEXT: packh a2, a2, a3 -; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a3, 2(a0) +; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a5, 1(a0) +; CHECK-ALIGNED-RV64-ZBKB-NEXT: lbu a6, 2(a0) ; CHECK-ALIGNED-RV64-ZBKB-NEXT: lb a0, 3(a0) -; CHECK-ALIGNED-RV64-ZBKB-NEXT: packh a5, a5, a6 +; CHECK-ALIGNED-RV64-ZBKB-NEXT: packh a3, a3, a5 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: slli a4, a4, 16 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: slli a1, a1, 24 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a1, a1, a4 -; CHECK-ALIGNED-RV64-ZBKB-NEXT: slli a3, a3, 16 +; CHECK-ALIGNED-RV64-ZBKB-NEXT: slli a6, a6, 16 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a0, a0, a3 +; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a0, a0, a6 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a1, a1, a2 -; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV64-ZBKB-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV64-ZBKB-NEXT: ret ; ; CHECK-ALIGNED-RV32-V-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV32-V: # %bb.0: # %entry -; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 3(a1) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV32-V-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV32-V-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 3(a0) -; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV32-V-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV32-V-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a1, 3(a1) +; CHECK-ALIGNED-RV32-V-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV32-V-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV32-V-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV32-V-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV32-V-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV32-V-NEXT: lbu a0, 3(a0) +; CHECK-ALIGNED-RV32-V-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV32-V-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV32-V-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV32-V-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV32-V-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV32-V-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV32-V-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV32-V-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV32-V-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV32-V-NEXT: ret ; ; CHECK-ALIGNED-RV64-V-LABEL: memcmp_eq_zero: ; CHECK-ALIGNED-RV64-V: # %bb.0: # %entry -; CHECK-ALIGNED-RV64-V-NEXT: lbu a2, 1(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 2(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lb a4, 3(a1) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a1, 0(a1) -; CHECK-ALIGNED-RV64-V-NEXT: slli a2, a2, 8 -; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 16 -; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a1, a2, a1 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a2, 0(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 1(a0) -; CHECK-ALIGNED-RV64-V-NEXT: or a3, a4, a3 -; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 2(a0) -; CHECK-ALIGNED-RV64-V-NEXT: lb a0, 3(a0) -; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 8 -; CHECK-ALIGNED-RV64-V-NEXT: or a2, a5, a2 +; CHECK-ALIGNED-RV64-V-NEXT: lbu a2, 0(a1) +; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 1(a1) +; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 2(a1) +; CHECK-ALIGNED-RV64-V-NEXT: lb a1, 3(a1) +; CHECK-ALIGNED-RV64-V-NEXT: slli a3, a3, 8 ; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 16 +; CHECK-ALIGNED-RV64-V-NEXT: slli a1, a1, 24 +; CHECK-ALIGNED-RV64-V-NEXT: or a2, a3, a2 +; CHECK-ALIGNED-RV64-V-NEXT: or a1, a1, a4 +; CHECK-ALIGNED-RV64-V-NEXT: lbu a3, 0(a0) +; CHECK-ALIGNED-RV64-V-NEXT: lbu a4, 1(a0) +; CHECK-ALIGNED-RV64-V-NEXT: lbu a5, 2(a0) +; CHECK-ALIGNED-RV64-V-NEXT: lb a0, 3(a0) +; CHECK-ALIGNED-RV64-V-NEXT: slli a4, a4, 8 +; CHECK-ALIGNED-RV64-V-NEXT: or a3, a4, a3 +; CHECK-ALIGNED-RV64-V-NEXT: slli a5, a5, 16 ; CHECK-ALIGNED-RV64-V-NEXT: slli a0, a0, 24 -; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a4 -; CHECK-ALIGNED-RV64-V-NEXT: or a1, a3, a1 -; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a2 +; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a5 +; CHECK-ALIGNED-RV64-V-NEXT: or a1, a1, a2 +; CHECK-ALIGNED-RV64-V-NEXT: or a0, a0, a3 ; CHECK-ALIGNED-RV64-V-NEXT: xor a0, a0, a1 ; CHECK-ALIGNED-RV64-V-NEXT: seqz a0, a0 ; CHECK-ALIGNED-RV64-V-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/memmove.ll b/llvm/test/CodeGen/RISCV/memmove.ll index 62915bd4ad99d..4795d2c6a5209 100644 --- a/llvm/test/CodeGen/RISCV/memmove.ll +++ b/llvm/test/CodeGen/RISCV/memmove.ll @@ -159,38 +159,38 @@ entry: define void @unaligned_memmove7(ptr nocapture %dest, ptr %src) nounwind { ; RV32-LABEL: unaligned_memmove7: ; RV32: # %bb.0: # %entry -; RV32-NEXT: lbu a2, 4(a1) -; RV32-NEXT: lbu a3, 5(a1) -; RV32-NEXT: lbu a4, 6(a1) -; RV32-NEXT: lbu a5, 0(a1) -; RV32-NEXT: lbu a6, 1(a1) -; RV32-NEXT: lbu a7, 2(a1) -; RV32-NEXT: lbu a1, 3(a1) -; RV32-NEXT: sb a2, 4(a0) -; RV32-NEXT: sb a3, 5(a0) -; RV32-NEXT: sb a4, 6(a0) -; RV32-NEXT: sb a5, 0(a0) -; RV32-NEXT: sb a6, 1(a0) -; RV32-NEXT: sb a7, 2(a0) -; RV32-NEXT: sb a1, 3(a0) +; RV32-NEXT: lbu a2, 0(a1) +; RV32-NEXT: lbu a3, 1(a1) +; RV32-NEXT: lbu a4, 2(a1) +; RV32-NEXT: lbu a5, 3(a1) +; RV32-NEXT: lbu a6, 4(a1) +; RV32-NEXT: lbu a7, 5(a1) +; RV32-NEXT: lbu a1, 6(a1) +; RV32-NEXT: sb a6, 4(a0) +; RV32-NEXT: sb a7, 5(a0) +; RV32-NEXT: sb a1, 6(a0) +; RV32-NEXT: sb a2, 0(a0) +; RV32-NEXT: sb a3, 1(a0) +; RV32-NEXT: sb a4, 2(a0) +; RV32-NEXT: sb a5, 3(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: unaligned_memmove7: ; RV64: # %bb.0: # %entry -; RV64-NEXT: lbu a2, 4(a1) -; RV64-NEXT: lbu a3, 5(a1) -; RV64-NEXT: lbu a4, 6(a1) -; RV64-NEXT: lbu a5, 0(a1) -; RV64-NEXT: lbu a6, 1(a1) -; RV64-NEXT: lbu a7, 2(a1) -; RV64-NEXT: lbu a1, 3(a1) -; RV64-NEXT: sb a2, 4(a0) -; RV64-NEXT: sb a3, 5(a0) -; RV64-NEXT: sb a4, 6(a0) -; RV64-NEXT: sb a5, 0(a0) -; RV64-NEXT: sb a6, 1(a0) -; RV64-NEXT: sb a7, 2(a0) -; RV64-NEXT: sb a1, 3(a0) +; RV64-NEXT: lbu a2, 0(a1) +; RV64-NEXT: lbu a3, 1(a1) +; RV64-NEXT: lbu a4, 2(a1) +; RV64-NEXT: lbu a5, 3(a1) +; RV64-NEXT: lbu a6, 4(a1) +; RV64-NEXT: lbu a7, 5(a1) +; RV64-NEXT: lbu a1, 6(a1) +; RV64-NEXT: sb a6, 4(a0) +; RV64-NEXT: sb a7, 5(a0) +; RV64-NEXT: sb a1, 6(a0) +; RV64-NEXT: sb a2, 0(a0) +; RV64-NEXT: sb a3, 1(a0) +; RV64-NEXT: sb a4, 2(a0) +; RV64-NEXT: sb a5, 3(a0) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: unaligned_memmove7: @@ -289,16 +289,16 @@ define void @unaligned_memmove15(ptr nocapture %dest, ptr %src) nounwind { ; ; RV32-FAST-LABEL: unaligned_memmove15: ; RV32-FAST: # %bb.0: # %entry -; RV32-FAST-NEXT: lbu a2, 14(a1) -; RV32-FAST-NEXT: lw a3, 0(a1) -; RV32-FAST-NEXT: lw a4, 4(a1) -; RV32-FAST-NEXT: lw a5, 8(a1) -; RV32-FAST-NEXT: lh a1, 12(a1) -; RV32-FAST-NEXT: sb a2, 14(a0) -; RV32-FAST-NEXT: sw a3, 0(a0) -; RV32-FAST-NEXT: sw a4, 4(a0) -; RV32-FAST-NEXT: sw a5, 8(a0) -; RV32-FAST-NEXT: sh a1, 12(a0) +; RV32-FAST-NEXT: lw a2, 0(a1) +; RV32-FAST-NEXT: lw a3, 4(a1) +; RV32-FAST-NEXT: lw a4, 8(a1) +; RV32-FAST-NEXT: lh a5, 12(a1) +; RV32-FAST-NEXT: lbu a1, 14(a1) +; RV32-FAST-NEXT: sb a1, 14(a0) +; RV32-FAST-NEXT: sw a2, 0(a0) +; RV32-FAST-NEXT: sw a3, 4(a0) +; RV32-FAST-NEXT: sw a4, 8(a0) +; RV32-FAST-NEXT: sh a5, 12(a0) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: unaligned_memmove15: @@ -365,18 +365,18 @@ define void @unaligned_memmove31(ptr nocapture %dest, ptr %src) nounwind { ; ; RV64-FAST-LABEL: unaligned_memmove31: ; RV64-FAST: # %bb.0: # %entry -; RV64-FAST-NEXT: lh a2, 28(a1) -; RV64-FAST-NEXT: lbu a3, 30(a1) -; RV64-FAST-NEXT: ld a4, 0(a1) -; RV64-FAST-NEXT: ld a5, 8(a1) -; RV64-FAST-NEXT: ld a6, 16(a1) -; RV64-FAST-NEXT: lw a1, 24(a1) -; RV64-FAST-NEXT: sh a2, 28(a0) -; RV64-FAST-NEXT: sb a3, 30(a0) -; RV64-FAST-NEXT: sd a4, 0(a0) -; RV64-FAST-NEXT: sd a5, 8(a0) -; RV64-FAST-NEXT: sd a6, 16(a0) -; RV64-FAST-NEXT: sw a1, 24(a0) +; RV64-FAST-NEXT: ld a2, 0(a1) +; RV64-FAST-NEXT: ld a3, 8(a1) +; RV64-FAST-NEXT: ld a4, 16(a1) +; RV64-FAST-NEXT: lw a5, 24(a1) +; RV64-FAST-NEXT: lh a6, 28(a1) +; RV64-FAST-NEXT: lbu a1, 30(a1) +; RV64-FAST-NEXT: sh a6, 28(a0) +; RV64-FAST-NEXT: sb a1, 30(a0) +; RV64-FAST-NEXT: sd a2, 0(a0) +; RV64-FAST-NEXT: sd a3, 8(a0) +; RV64-FAST-NEXT: sd a4, 16(a0) +; RV64-FAST-NEXT: sw a5, 24(a0) ; RV64-FAST-NEXT: ret entry: tail call void @llvm.memmove.p0.p0.i64(ptr %dest, ptr %src, i64 31, i1 false) @@ -579,18 +579,18 @@ define void @aligned_memmove31(ptr nocapture %dest, ptr %src) nounwind { ; ; RV64-BOTH-LABEL: aligned_memmove31: ; RV64-BOTH: # %bb.0: # %entry -; RV64-BOTH-NEXT: lh a2, 28(a1) -; RV64-BOTH-NEXT: lbu a3, 30(a1) -; RV64-BOTH-NEXT: ld a4, 0(a1) -; RV64-BOTH-NEXT: ld a5, 8(a1) -; RV64-BOTH-NEXT: ld a6, 16(a1) -; RV64-BOTH-NEXT: lw a1, 24(a1) -; RV64-BOTH-NEXT: sh a2, 28(a0) -; RV64-BOTH-NEXT: sb a3, 30(a0) -; RV64-BOTH-NEXT: sd a4, 0(a0) -; RV64-BOTH-NEXT: sd a5, 8(a0) -; RV64-BOTH-NEXT: sd a6, 16(a0) -; RV64-BOTH-NEXT: sw a1, 24(a0) +; RV64-BOTH-NEXT: ld a2, 0(a1) +; RV64-BOTH-NEXT: ld a3, 8(a1) +; RV64-BOTH-NEXT: ld a4, 16(a1) +; RV64-BOTH-NEXT: lw a5, 24(a1) +; RV64-BOTH-NEXT: lh a6, 28(a1) +; RV64-BOTH-NEXT: lbu a1, 30(a1) +; RV64-BOTH-NEXT: sh a6, 28(a0) +; RV64-BOTH-NEXT: sb a1, 30(a0) +; RV64-BOTH-NEXT: sd a2, 0(a0) +; RV64-BOTH-NEXT: sd a3, 8(a0) +; RV64-BOTH-NEXT: sd a4, 16(a0) +; RV64-BOTH-NEXT: sw a5, 24(a0) ; RV64-BOTH-NEXT: ret entry: tail call void @llvm.memmove.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 31, i1 false) diff --git a/llvm/test/CodeGen/RISCV/memset-pattern.ll b/llvm/test/CodeGen/RISCV/memset-pattern.ll index 35ce7fad0ea67..3b80c5684bfd0 100644 --- a/llvm/test/CodeGen/RISCV/memset-pattern.ll +++ b/llvm/test/CodeGen/RISCV/memset-pattern.ll @@ -15,24 +15,24 @@ define void @memset_1(ptr %a, i128 %value) nounwind { ; RV32-BOTH-LABEL: memset_1: ; RV32-BOTH: # %bb.0: # %loadstoreloop.preheader ; RV32-BOTH-NEXT: li a2, 0 -; RV32-BOTH-NEXT: lw a3, 0(a1) -; RV32-BOTH-NEXT: lw a4, 4(a1) -; RV32-BOTH-NEXT: lw a5, 8(a1) +; RV32-BOTH-NEXT: li a3, 0 +; RV32-BOTH-NEXT: lw a4, 0(a1) +; RV32-BOTH-NEXT: lw a5, 4(a1) +; RV32-BOTH-NEXT: lw a6, 8(a1) ; RV32-BOTH-NEXT: lw a1, 12(a1) -; RV32-BOTH-NEXT: li a6, 0 ; RV32-BOTH-NEXT: .LBB0_1: # %loadstoreloop ; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-BOTH-NEXT: slli a7, a2, 4 ; RV32-BOTH-NEXT: addi a2, a2, 1 ; RV32-BOTH-NEXT: add a7, a0, a7 -; RV32-BOTH-NEXT: seqz t0, a2 -; RV32-BOTH-NEXT: add a6, a6, t0 -; RV32-BOTH-NEXT: or t0, a2, a6 -; RV32-BOTH-NEXT: sw a3, 0(a7) -; RV32-BOTH-NEXT: sw a4, 4(a7) -; RV32-BOTH-NEXT: sw a5, 8(a7) +; RV32-BOTH-NEXT: sw a4, 0(a7) +; RV32-BOTH-NEXT: sw a5, 4(a7) +; RV32-BOTH-NEXT: sw a6, 8(a7) ; RV32-BOTH-NEXT: sw a1, 12(a7) -; RV32-BOTH-NEXT: beqz t0, .LBB0_1 +; RV32-BOTH-NEXT: seqz a7, a2 +; RV32-BOTH-NEXT: add a3, a3, a7 +; RV32-BOTH-NEXT: or a7, a2, a3 +; RV32-BOTH-NEXT: beqz a7, .LBB0_1 ; RV32-BOTH-NEXT: # %bb.2: # %split ; RV32-BOTH-NEXT: ret ; @@ -60,19 +60,18 @@ define void @memset_1_noalign(ptr %a, i128 %value) nounwind { ; RV32-NEXT: sw s2, 20(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s3, 16(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s4, 12(sp) # 4-byte Folded Spill -; RV32-NEXT: sw s5, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: li a2, 0 ; RV32-NEXT: li a3, 0 -; RV32-NEXT: lw a4, 4(a1) -; RV32-NEXT: lw a5, 0(a1) +; RV32-NEXT: lw a4, 0(a1) +; RV32-NEXT: lw a5, 4(a1) ; RV32-NEXT: lw a6, 8(a1) ; RV32-NEXT: lw a1, 12(a1) -; RV32-NEXT: srli a7, a4, 24 -; RV32-NEXT: srli t0, a4, 16 -; RV32-NEXT: srli t1, a4, 8 -; RV32-NEXT: srli t2, a5, 24 -; RV32-NEXT: srli t3, a5, 16 -; RV32-NEXT: srli t4, a5, 8 +; RV32-NEXT: srli a7, a5, 24 +; RV32-NEXT: srli t0, a5, 16 +; RV32-NEXT: srli t1, a5, 8 +; RV32-NEXT: srli t2, a4, 24 +; RV32-NEXT: srli t3, a4, 16 +; RV32-NEXT: srli t4, a4, 8 ; RV32-NEXT: srli t5, a6, 24 ; RV32-NEXT: srli t6, a6, 16 ; RV32-NEXT: srli s0, a6, 8 @@ -84,12 +83,11 @@ define void @memset_1_noalign(ptr %a, i128 %value) nounwind { ; RV32-NEXT: slli s4, a2, 4 ; RV32-NEXT: addi a2, a2, 1 ; RV32-NEXT: add s4, a0, s4 -; RV32-NEXT: seqz s5, a2 -; RV32-NEXT: sb a4, 4(s4) +; RV32-NEXT: sb a5, 4(s4) ; RV32-NEXT: sb t1, 5(s4) ; RV32-NEXT: sb t0, 6(s4) ; RV32-NEXT: sb a7, 7(s4) -; RV32-NEXT: sb a5, 0(s4) +; RV32-NEXT: sb a4, 0(s4) ; RV32-NEXT: sb t4, 1(s4) ; RV32-NEXT: sb t3, 2(s4) ; RV32-NEXT: sb t2, 3(s4) @@ -97,20 +95,20 @@ define void @memset_1_noalign(ptr %a, i128 %value) nounwind { ; RV32-NEXT: sb s0, 9(s4) ; RV32-NEXT: sb t6, 10(s4) ; RV32-NEXT: sb t5, 11(s4) -; RV32-NEXT: add a3, a3, s5 -; RV32-NEXT: or s5, a2, a3 ; RV32-NEXT: sb a1, 12(s4) ; RV32-NEXT: sb s3, 13(s4) ; RV32-NEXT: sb s2, 14(s4) ; RV32-NEXT: sb s1, 15(s4) -; RV32-NEXT: beqz s5, .LBB1_1 +; RV32-NEXT: seqz s4, a2 +; RV32-NEXT: add a3, a3, s4 +; RV32-NEXT: or s4, a2, a3 +; RV32-NEXT: beqz s4, .LBB1_1 ; RV32-NEXT: # %bb.2: # %split ; RV32-NEXT: lw s0, 28(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 24(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s2, 20(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s3, 16(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s4, 12(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s5, 8(sp) # 4-byte Folded Reload ; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: ret ; @@ -165,24 +163,24 @@ define void @memset_1_noalign(ptr %a, i128 %value) nounwind { ; RV32-FAST-LABEL: memset_1_noalign: ; RV32-FAST: # %bb.0: # %loadstoreloop.preheader ; RV32-FAST-NEXT: li a2, 0 -; RV32-FAST-NEXT: lw a3, 0(a1) -; RV32-FAST-NEXT: lw a4, 4(a1) -; RV32-FAST-NEXT: lw a5, 8(a1) +; RV32-FAST-NEXT: li a3, 0 +; RV32-FAST-NEXT: lw a4, 0(a1) +; RV32-FAST-NEXT: lw a5, 4(a1) +; RV32-FAST-NEXT: lw a6, 8(a1) ; RV32-FAST-NEXT: lw a1, 12(a1) -; RV32-FAST-NEXT: li a6, 0 ; RV32-FAST-NEXT: .LBB1_1: # %loadstoreloop ; RV32-FAST-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-FAST-NEXT: slli a7, a2, 4 ; RV32-FAST-NEXT: addi a2, a2, 1 ; RV32-FAST-NEXT: add a7, a0, a7 -; RV32-FAST-NEXT: seqz t0, a2 -; RV32-FAST-NEXT: add a6, a6, t0 -; RV32-FAST-NEXT: or t0, a2, a6 -; RV32-FAST-NEXT: sw a3, 0(a7) -; RV32-FAST-NEXT: sw a4, 4(a7) -; RV32-FAST-NEXT: sw a5, 8(a7) +; RV32-FAST-NEXT: sw a4, 0(a7) +; RV32-FAST-NEXT: sw a5, 4(a7) +; RV32-FAST-NEXT: sw a6, 8(a7) ; RV32-FAST-NEXT: sw a1, 12(a7) -; RV32-FAST-NEXT: beqz t0, .LBB1_1 +; RV32-FAST-NEXT: seqz a7, a2 +; RV32-FAST-NEXT: add a3, a3, a7 +; RV32-FAST-NEXT: or a7, a2, a3 +; RV32-FAST-NEXT: beqz a7, .LBB1_1 ; RV32-FAST-NEXT: # %bb.2: # %split ; RV32-FAST-NEXT: ret ; @@ -205,26 +203,26 @@ define void @memset_4(ptr %a, i128 %value) nounwind { ; RV32-BOTH-LABEL: memset_4: ; RV32-BOTH: # %bb.0: # %loadstoreloop.preheader ; RV32-BOTH-NEXT: li a2, 0 -; RV32-BOTH-NEXT: lw a3, 0(a1) -; RV32-BOTH-NEXT: lw a4, 4(a1) -; RV32-BOTH-NEXT: lw a5, 8(a1) +; RV32-BOTH-NEXT: li a3, 0 +; RV32-BOTH-NEXT: lw a4, 0(a1) +; RV32-BOTH-NEXT: lw a5, 4(a1) +; RV32-BOTH-NEXT: lw a6, 8(a1) ; RV32-BOTH-NEXT: lw a1, 12(a1) -; RV32-BOTH-NEXT: li a6, 0 ; RV32-BOTH-NEXT: .LBB2_1: # %loadstoreloop ; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-BOTH-NEXT: slli a7, a2, 4 ; RV32-BOTH-NEXT: addi a2, a2, 1 -; RV32-BOTH-NEXT: seqz t0, a2 -; RV32-BOTH-NEXT: sltiu t1, a2, 4 -; RV32-BOTH-NEXT: add a6, a6, t0 -; RV32-BOTH-NEXT: seqz t0, a6 -; RV32-BOTH-NEXT: and t0, t0, t1 ; RV32-BOTH-NEXT: add a7, a0, a7 -; RV32-BOTH-NEXT: sw a3, 0(a7) -; RV32-BOTH-NEXT: sw a4, 4(a7) -; RV32-BOTH-NEXT: sw a5, 8(a7) +; RV32-BOTH-NEXT: seqz t0, a2 +; RV32-BOTH-NEXT: sw a4, 0(a7) +; RV32-BOTH-NEXT: sw a5, 4(a7) +; RV32-BOTH-NEXT: sw a6, 8(a7) ; RV32-BOTH-NEXT: sw a1, 12(a7) -; RV32-BOTH-NEXT: bnez t0, .LBB2_1 +; RV32-BOTH-NEXT: add a3, a3, t0 +; RV32-BOTH-NEXT: seqz a7, a3 +; RV32-BOTH-NEXT: sltiu t0, a2, 4 +; RV32-BOTH-NEXT: and a7, a7, t0 +; RV32-BOTH-NEXT: bnez a7, .LBB2_1 ; RV32-BOTH-NEXT: # %bb.2: # %split ; RV32-BOTH-NEXT: ret ; @@ -250,28 +248,28 @@ define void @memset_x(ptr %a, i128 %value, i64 %x) nounwind { ; RV32-BOTH-NEXT: beqz a4, .LBB3_5 ; RV32-BOTH-NEXT: # %bb.1: # %loadstoreloop.preheader ; RV32-BOTH-NEXT: li a4, 0 -; RV32-BOTH-NEXT: lw a5, 0(a1) -; RV32-BOTH-NEXT: lw a6, 4(a1) -; RV32-BOTH-NEXT: lw a7, 8(a1) +; RV32-BOTH-NEXT: li a5, 0 +; RV32-BOTH-NEXT: lw a6, 0(a1) +; RV32-BOTH-NEXT: lw a7, 4(a1) +; RV32-BOTH-NEXT: lw t0, 8(a1) ; RV32-BOTH-NEXT: lw a1, 12(a1) -; RV32-BOTH-NEXT: li t0, 0 ; RV32-BOTH-NEXT: j .LBB3_3 ; RV32-BOTH-NEXT: .LBB3_2: # %loadstoreloop ; RV32-BOTH-NEXT: # in Loop: Header=BB3_3 Depth=1 -; RV32-BOTH-NEXT: sltu t1, t0, a3 +; RV32-BOTH-NEXT: sltu t1, a5, a3 ; RV32-BOTH-NEXT: beqz t1, .LBB3_5 ; RV32-BOTH-NEXT: .LBB3_3: # %loadstoreloop ; RV32-BOTH-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32-BOTH-NEXT: slli t1, a4, 4 ; RV32-BOTH-NEXT: addi a4, a4, 1 -; RV32-BOTH-NEXT: seqz t2, a4 -; RV32-BOTH-NEXT: add t0, t0, t2 ; RV32-BOTH-NEXT: add t1, a0, t1 -; RV32-BOTH-NEXT: sw a5, 0(t1) -; RV32-BOTH-NEXT: sw a6, 4(t1) -; RV32-BOTH-NEXT: sw a7, 8(t1) +; RV32-BOTH-NEXT: sw a6, 0(t1) +; RV32-BOTH-NEXT: sw a7, 4(t1) +; RV32-BOTH-NEXT: sw t0, 8(t1) ; RV32-BOTH-NEXT: sw a1, 12(t1) -; RV32-BOTH-NEXT: bne t0, a3, .LBB3_2 +; RV32-BOTH-NEXT: seqz t1, a4 +; RV32-BOTH-NEXT: add a5, a5, t1 +; RV32-BOTH-NEXT: bne a5, a3, .LBB3_2 ; RV32-BOTH-NEXT: # %bb.4: # in Loop: Header=BB3_3 Depth=1 ; RV32-BOTH-NEXT: sltu t1, a4, a2 ; RV32-BOTH-NEXT: bnez t1, .LBB3_3 diff --git a/llvm/test/CodeGen/RISCV/mul.ll b/llvm/test/CodeGen/RISCV/mul.ll index 548c7e1c6ea8c..39dca893bd428 100644 --- a/llvm/test/CodeGen/RISCV/mul.ll +++ b/llvm/test/CodeGen/RISCV/mul.ll @@ -1298,34 +1298,34 @@ define i64 @muli64_m3840(i64 %a) nounwind { define i128 @muli128_m3840(i128 %a) nounwind { ; RV32I-LABEL: muli128_m3840: ; RV32I: # %bb.0: +; RV32I-NEXT: lw a6, 0(a1) ; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw a2, 8(a1) -; RV32I-NEXT: lw a5, 0(a1) ; RV32I-NEXT: lw a4, 12(a1) ; RV32I-NEXT: srli a1, a3, 20 -; RV32I-NEXT: slli a6, a2, 12 +; RV32I-NEXT: slli a5, a2, 12 ; RV32I-NEXT: srli a7, a3, 24 ; RV32I-NEXT: slli t0, a2, 8 ; RV32I-NEXT: srli t1, a2, 20 -; RV32I-NEXT: or a1, a6, a1 -; RV32I-NEXT: slli a6, a4, 12 +; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: slli a5, a4, 12 ; RV32I-NEXT: srli t2, a2, 24 ; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: or a2, t0, a7 -; RV32I-NEXT: srli a7, a5, 20 -; RV32I-NEXT: or a6, a6, t1 +; RV32I-NEXT: srli a7, a6, 20 +; RV32I-NEXT: or a5, a5, t1 ; RV32I-NEXT: slli t0, a3, 12 ; RV32I-NEXT: or t1, a4, t2 -; RV32I-NEXT: srli t2, a5, 24 +; RV32I-NEXT: srli t2, a6, 24 ; RV32I-NEXT: slli t3, a3, 8 ; RV32I-NEXT: or a3, t0, a7 -; RV32I-NEXT: slli a4, a5, 12 -; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: slli a4, a6, 12 +; RV32I-NEXT: slli a6, a6, 8 ; RV32I-NEXT: or t0, t3, t2 -; RV32I-NEXT: sltu t2, a2, a1 -; RV32I-NEXT: sub a6, t1, a6 -; RV32I-NEXT: sltu a7, a5, a4 -; RV32I-NEXT: sub a6, a6, t2 +; RV32I-NEXT: sltu a7, a2, a1 +; RV32I-NEXT: sub a5, t1, a5 +; RV32I-NEXT: sub a5, a5, a7 +; RV32I-NEXT: sltu a7, a6, a4 ; RV32I-NEXT: mv t1, a7 ; RV32I-NEXT: beq t0, a3, .LBB36_2 ; RV32I-NEXT: # %bb.1: @@ -1333,15 +1333,15 @@ define i128 @muli128_m3840(i128 %a) nounwind { ; RV32I-NEXT: .LBB36_2: ; RV32I-NEXT: sub a2, a2, a1 ; RV32I-NEXT: sub a1, t0, a3 -; RV32I-NEXT: sub a5, a5, a4 -; RV32I-NEXT: sltu a3, a2, t1 +; RV32I-NEXT: sub a3, a6, a4 +; RV32I-NEXT: sltu a4, a2, t1 ; RV32I-NEXT: sub a2, a2, t1 ; RV32I-NEXT: sub a1, a1, a7 -; RV32I-NEXT: sub a3, a6, a3 -; RV32I-NEXT: sw a5, 0(a0) +; RV32I-NEXT: sub a5, a5, a4 +; RV32I-NEXT: sw a3, 0(a0) ; RV32I-NEXT: sw a1, 4(a0) ; RV32I-NEXT: sw a2, 8(a0) -; RV32I-NEXT: sw a3, 12(a0) +; RV32I-NEXT: sw a5, 12(a0) ; RV32I-NEXT: ret ; ; RV32IM-LABEL: muli128_m3840: diff --git a/llvm/test/CodeGen/RISCV/neg-abs.ll b/llvm/test/CodeGen/RISCV/neg-abs.ll index fe19a4fa8bbd8..a57acf5576cb7 100644 --- a/llvm/test/CodeGen/RISCV/neg-abs.ll +++ b/llvm/test/CodeGen/RISCV/neg-abs.ll @@ -162,35 +162,35 @@ define i32 @neg_abs32_multiuse(i32 %x, ptr %y) { ; RV32I: # %bb.0: ; RV32I-NEXT: srai a2, a0, 31 ; RV32I-NEXT: xor a0, a0, a2 -; RV32I-NEXT: sub a2, a0, a2 -; RV32I-NEXT: neg a0, a2 -; RV32I-NEXT: sw a2, 0(a1) +; RV32I-NEXT: sub a0, a0, a2 +; RV32I-NEXT: sw a0, 0(a1) +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: neg_abs32_multiuse: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: neg a2, a0 -; RV32ZBB-NEXT: max a2, a0, a2 -; RV32ZBB-NEXT: neg a0, a2 -; RV32ZBB-NEXT: sw a2, 0(a1) +; RV32ZBB-NEXT: max a0, a0, a2 +; RV32ZBB-NEXT: sw a0, 0(a1) +; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: neg_abs32_multiuse: ; RV64I: # %bb.0: ; RV64I-NEXT: sraiw a2, a0, 31 ; RV64I-NEXT: xor a0, a0, a2 -; RV64I-NEXT: subw a2, a0, a2 -; RV64I-NEXT: negw a0, a2 -; RV64I-NEXT: sw a2, 0(a1) +; RV64I-NEXT: subw a0, a0, a2 +; RV64I-NEXT: sw a0, 0(a1) +; RV64I-NEXT: negw a0, a0 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: neg_abs32_multiuse: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: sext.w a0, a0 ; RV64ZBB-NEXT: negw a2, a0 -; RV64ZBB-NEXT: max a2, a0, a2 -; RV64ZBB-NEXT: negw a0, a2 -; RV64ZBB-NEXT: sw a2, 0(a1) +; RV64ZBB-NEXT: max a0, a0, a2 +; RV64ZBB-NEXT: sw a0, 0(a1) +; RV64ZBB-NEXT: negw a0, a0 ; RV64ZBB-NEXT: ret %abs = tail call i32 @llvm.abs.i32(i32 %x, i1 true) store i32 %abs, ptr %y @@ -208,14 +208,12 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) { ; RV32I-NEXT: sub a1, a1, a3 ; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: snez a3, a0 -; RV32I-NEXT: neg a4, a1 -; RV32I-NEXT: sub a3, a4, a3 -; RV32I-NEXT: neg a4, a0 ; RV32I-NEXT: sw a0, 0(a2) ; RV32I-NEXT: sw a1, 4(a2) -; RV32I-NEXT: mv a0, a4 -; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: snez a2, a0 +; RV32I-NEXT: neg a1, a1 +; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: neg a0, a0 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: neg_abs64_multiuse: @@ -227,31 +225,29 @@ define i64 @neg_abs64_multiuse(i64 %x, ptr %y) { ; RV32ZBB-NEXT: sub a1, a1, a3 ; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: .LBB5_2: -; RV32ZBB-NEXT: snez a3, a0 -; RV32ZBB-NEXT: neg a4, a1 -; RV32ZBB-NEXT: sub a3, a4, a3 -; RV32ZBB-NEXT: neg a4, a0 ; RV32ZBB-NEXT: sw a0, 0(a2) ; RV32ZBB-NEXT: sw a1, 4(a2) -; RV32ZBB-NEXT: mv a0, a4 -; RV32ZBB-NEXT: mv a1, a3 +; RV32ZBB-NEXT: snez a2, a0 +; RV32ZBB-NEXT: neg a1, a1 +; RV32ZBB-NEXT: sub a1, a1, a2 +; RV32ZBB-NEXT: neg a0, a0 ; RV32ZBB-NEXT: ret ; ; RV64I-LABEL: neg_abs64_multiuse: ; RV64I: # %bb.0: ; RV64I-NEXT: srai a2, a0, 63 ; RV64I-NEXT: xor a0, a0, a2 -; RV64I-NEXT: sub a2, a0, a2 -; RV64I-NEXT: neg a0, a2 -; RV64I-NEXT: sd a2, 0(a1) +; RV64I-NEXT: sub a0, a0, a2 +; RV64I-NEXT: sd a0, 0(a1) +; RV64I-NEXT: neg a0, a0 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: neg_abs64_multiuse: ; RV64ZBB: # %bb.0: ; RV64ZBB-NEXT: neg a2, a0 -; RV64ZBB-NEXT: max a2, a0, a2 -; RV64ZBB-NEXT: neg a0, a2 -; RV64ZBB-NEXT: sd a2, 0(a1) +; RV64ZBB-NEXT: max a0, a0, a2 +; RV64ZBB-NEXT: sd a0, 0(a1) +; RV64ZBB-NEXT: neg a0, a0 ; RV64ZBB-NEXT: ret %abs = tail call i64 @llvm.abs.i64(i64 %x, i1 true) store i64 %abs, ptr %y diff --git a/llvm/test/CodeGen/RISCV/orc-b-patterns.ll b/llvm/test/CodeGen/RISCV/orc-b-patterns.ll index 5ede992e844f1..ff9d7a009fc29 100644 --- a/llvm/test/CodeGen/RISCV/orc-b-patterns.ll +++ b/llvm/test/CodeGen/RISCV/orc-b-patterns.ll @@ -233,9 +233,9 @@ define i32 @orc_b_i32_sub_shl8x_x_b1_shl_used(i32 %x, ptr %arr) { ; RV32I-NEXT: addi a2, a2, 514 ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: slli a2, a0, 7 -; RV32I-NEXT: srli a3, a0, 1 -; RV32I-NEXT: sub a0, a2, a3 -; RV32I-NEXT: sw a3, 0(a1) +; RV32I-NEXT: srli a0, a0, 1 +; RV32I-NEXT: sw a0, 0(a1) +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b1_shl_used: @@ -244,8 +244,8 @@ define i32 @orc_b_i32_sub_shl8x_x_b1_shl_used(i32 %x, ptr %arr) { ; RV32ZBB-NEXT: addi a2, a2, 514 ; RV32ZBB-NEXT: and a0, a0, a2 ; RV32ZBB-NEXT: srli a2, a0, 1 -; RV32ZBB-NEXT: orc.b a0, a0 ; RV32ZBB-NEXT: sw a2, 0(a1) +; RV32ZBB-NEXT: orc.b a0, a0 ; RV32ZBB-NEXT: ret entry: %and = and i32 %x, 33686018 @@ -264,8 +264,8 @@ define i32 @orc_b_i32_sub_shl8x_x_b1_srl_used(i32 %x, ptr %arr) { ; RV32I-NEXT: and a0, a0, a2 ; RV32I-NEXT: slli a2, a0, 7 ; RV32I-NEXT: srli a0, a0, 1 -; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: sw a2, 0(a1) +; RV32I-NEXT: sub a0, a2, a0 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: orc_b_i32_sub_shl8x_x_b1_srl_used: @@ -274,8 +274,8 @@ define i32 @orc_b_i32_sub_shl8x_x_b1_srl_used(i32 %x, ptr %arr) { ; RV32ZBB-NEXT: addi a2, a2, 514 ; RV32ZBB-NEXT: and a0, a0, a2 ; RV32ZBB-NEXT: slli a2, a0, 7 -; RV32ZBB-NEXT: orc.b a0, a0 ; RV32ZBB-NEXT: sw a2, 0(a1) +; RV32ZBB-NEXT: orc.b a0, a0 ; RV32ZBB-NEXT: ret entry: %and = and i32 %x, 33686018 @@ -320,8 +320,8 @@ define i32 @orc_b_i32_sub_shl8x_x_shl_used(i32 %x, ptr %arr){ ; CHECK-NEXT: addi a2, a2, 257 ; CHECK-NEXT: and a0, a0, a2 ; CHECK-NEXT: slli a2, a0, 8 -; CHECK-NEXT: sub a0, a2, a0 ; CHECK-NEXT: sw a2, 0(a1) +; CHECK-NEXT: sub a0, a2, a0 ; CHECK-NEXT: ret entry: %and = and i32 %x, 16843009 @@ -338,10 +338,10 @@ define i32 @orc_b_i32_sub_shl8x_x_b1_both_used(i32 %x, ptr %arr) { ; CHECK-NEXT: addi a2, a2, 514 ; CHECK-NEXT: and a0, a0, a2 ; CHECK-NEXT: slli a2, a0, 7 -; CHECK-NEXT: srli a3, a0, 1 -; CHECK-NEXT: sub a0, a2, a3 +; CHECK-NEXT: srli a0, a0, 1 ; CHECK-NEXT: sw a2, 0(a1) -; CHECK-NEXT: sw a3, 4(a1) +; CHECK-NEXT: sw a0, 4(a1) +; CHECK-NEXT: sub a0, a2, a0 ; CHECK-NEXT: ret entry: %and = and i32 %x, 33686018 diff --git a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll index 5a01d43fea56b..48ba11b260bda 100644 --- a/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll +++ b/llvm/test/CodeGen/RISCV/overflow-intrinsics.ll @@ -373,12 +373,12 @@ define i64 @uaddo6_xor(i64 %a, i64 %b) { ; ; RV64-LABEL: uaddo6_xor: ; RV64: # %bb.0: -; RV64-NEXT: not a2, a0 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltu a2, a1, .LBB8_2 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: bltu a0, a1, .LBB8_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 42 +; RV64-NEXT: li a1, 42 ; RV64-NEXT: .LBB8_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %x = xor i64 %a, -1 %cmp = icmp ult i64 %x, %b @@ -409,12 +409,12 @@ define i64 @uaddo6_xor_commuted(i64 %a, i64 %b) { ; ; RV64-LABEL: uaddo6_xor_commuted: ; RV64: # %bb.0: -; RV64-NEXT: not a2, a0 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltu a2, a1, .LBB9_2 +; RV64-NEXT: not a0, a0 +; RV64-NEXT: bltu a0, a1, .LBB9_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 42 +; RV64-NEXT: li a1, 42 ; RV64-NEXT: .LBB9_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %x = xor i64 %a, -1 %cmp = icmp ult i64 %x, %b @@ -436,8 +436,8 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) { ; RV32-NEXT: .cfi_offset s0, -8 ; RV32-NEXT: .cfi_offset s1, -12 ; RV32-NEXT: mv s0, a2 -; RV32-NEXT: not a1, a1 ; RV32-NEXT: not a0, a0 +; RV32-NEXT: not a1, a1 ; RV32-NEXT: beq a1, a3, .LBB10_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: sltu a2, a1, a3 @@ -472,8 +472,8 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) { ; RV64-NEXT: sd s0, 0(sp) # 8-byte Folded Spill ; RV64-NEXT: .cfi_offset ra, -8 ; RV64-NEXT: .cfi_offset s0, -16 -; RV64-NEXT: not a0, a0 ; RV64-NEXT: mv s0, a1 +; RV64-NEXT: not a0, a0 ; RV64-NEXT: bltu a0, a1, .LBB10_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li s0, 42 @@ -499,17 +499,17 @@ define i64 @uaddo6_xor_multi_use(i64 %a, i64 %b) { define i1 @uaddo6_xor_op_after_XOR(i32 %a, ptr %b.ptr) { ; RV32-LABEL: uaddo6_xor_op_after_XOR: ; RV32: # %bb.0: -; RV32-NEXT: lw a1, 0(a1) ; RV32-NEXT: not a0, a0 +; RV32-NEXT: lw a1, 0(a1) ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: xori a0, a0, 1 ; RV32-NEXT: ret ; ; RV64-LABEL: uaddo6_xor_op_after_XOR: ; RV64: # %bb.0: -; RV64-NEXT: lw a1, 0(a1) ; RV64-NEXT: not a0, a0 ; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: lw a1, 0(a1) ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: xori a0, a0, 1 ; RV64-NEXT: ret @@ -811,8 +811,8 @@ define i1 @usubo_ult_i64_math_overflow_used(i64 %x, i64 %y, ptr %p) { ; RV64-LABEL: usubo_ult_i64_math_overflow_used: ; RV64: # %bb.0: ; RV64-NEXT: sub a3, a0, a1 -; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: sd a3, 0(a2) +; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: ret %s = sub i64 %x, %y store i64 %s, ptr %p @@ -1080,33 +1080,33 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { ; RV32-NEXT: .cfi_offset s5, -28 ; RV32-NEXT: .cfi_offset s6, -32 ; RV32-NEXT: mv s5, a5 -; RV32-NEXT: mv s3, a1 -; RV32-NEXT: andi a1, a5, 1 -; RV32-NEXT: beqz a1, .LBB32_8 +; RV32-NEXT: mv s3, a0 +; RV32-NEXT: andi a0, a5, 1 +; RV32-NEXT: beqz a0, .LBB32_8 ; RV32-NEXT: # %bb.1: # %t ; RV32-NEXT: mv s0, a4 ; RV32-NEXT: mv s2, a3 ; RV32-NEXT: mv s1, a2 -; RV32-NEXT: mv s4, a0 -; RV32-NEXT: beq s3, a3, .LBB32_3 +; RV32-NEXT: mv s4, a1 +; RV32-NEXT: beq a1, a3, .LBB32_3 ; RV32-NEXT: # %bb.2: # %t -; RV32-NEXT: sltu s6, s3, s2 +; RV32-NEXT: sltu s6, s4, s2 ; RV32-NEXT: j .LBB32_4 ; RV32-NEXT: .LBB32_3: -; RV32-NEXT: sltu s6, s4, s1 +; RV32-NEXT: sltu s6, s3, s1 ; RV32-NEXT: .LBB32_4: # %t ; RV32-NEXT: mv a0, s6 ; RV32-NEXT: call call ; RV32-NEXT: beqz s6, .LBB32_8 ; RV32-NEXT: # %bb.5: # %end -; RV32-NEXT: sltu a1, s4, s1 +; RV32-NEXT: sltu a1, s3, s1 ; RV32-NEXT: mv a0, a1 -; RV32-NEXT: beq s3, s2, .LBB32_7 +; RV32-NEXT: beq s4, s2, .LBB32_7 ; RV32-NEXT: # %bb.6: # %end -; RV32-NEXT: sltu a0, s3, s2 +; RV32-NEXT: sltu a0, s4, s2 ; RV32-NEXT: .LBB32_7: # %end -; RV32-NEXT: sub a2, s3, s2 -; RV32-NEXT: sub a3, s4, s1 +; RV32-NEXT: sub a2, s4, s2 +; RV32-NEXT: sub a3, s3, s1 ; RV32-NEXT: sub a2, a2, a1 ; RV32-NEXT: sw a3, 0(s0) ; RV32-NEXT: sw a2, 4(s0) @@ -1151,13 +1151,13 @@ define i1 @usubo_ult_cmp_dominates_i64(i64 %x, i64 %y, ptr %p, i1 %cond) { ; RV64-NEXT: .cfi_offset s3, -40 ; RV64-NEXT: .cfi_offset s4, -48 ; RV64-NEXT: mv s0, a3 -; RV64-NEXT: mv s2, a1 -; RV64-NEXT: andi a1, a3, 1 -; RV64-NEXT: beqz a1, .LBB32_3 +; RV64-NEXT: mv s3, a0 +; RV64-NEXT: andi a0, a3, 1 +; RV64-NEXT: beqz a0, .LBB32_3 ; RV64-NEXT: # %bb.1: # %t ; RV64-NEXT: mv s1, a2 -; RV64-NEXT: mv s3, a0 -; RV64-NEXT: sltu s4, a0, s2 +; RV64-NEXT: mv s2, a1 +; RV64-NEXT: sltu s4, s3, a1 ; RV64-NEXT: mv a0, s4 ; RV64-NEXT: call call ; RV64-NEXT: bgeu s3, s2, .LBB32_3 diff --git a/llvm/test/CodeGen/RISCV/pr51206.ll b/llvm/test/CodeGen/RISCV/pr51206.ll index 8e858bdd29762..ccb57c442fbfa 100644 --- a/llvm/test/CodeGen/RISCV/pr51206.ll +++ b/llvm/test/CodeGen/RISCV/pr51206.ll @@ -13,21 +13,21 @@ define signext i32 @wobble() nounwind { ; CHECK: # %bb.0: # %bb ; CHECK-NEXT: lui a0, %hi(global) ; CHECK-NEXT: lui a1, %hi(global.1) -; CHECK-NEXT: lbu a0, %lo(global)(a0) ; CHECK-NEXT: lui a2, %hi(global.2) -; CHECK-NEXT: lui a3, 52429 -; CHECK-NEXT: lbu a2, %lo(global.2)(a2) +; CHECK-NEXT: lbu a0, %lo(global)(a0) ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: sw a0, %lo(global.1)(a1) -; CHECK-NEXT: lui a1, %hi(global.3) -; CHECK-NEXT: slli a3, a3, 4 +; CHECK-NEXT: lui a1, 52429 +; CHECK-NEXT: lbu a2, %lo(global.2)(a2) +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: mul a0, a0, a2 ; CHECK-NEXT: slli a2, a0, 48 -; CHECK-NEXT: mulhu a2, a2, a3 -; CHECK-NEXT: srli a2, a2, 18 -; CHECK-NEXT: li a3, 5 -; CHECK-NEXT: sw a2, %lo(global.3)(a1) -; CHECK-NEXT: bgeu a0, a3, .LBB0_2 +; CHECK-NEXT: mulhu a1, a2, a1 +; CHECK-NEXT: lui a2, %hi(global.3) +; CHECK-NEXT: srli a1, a1, 18 +; CHECK-NEXT: sw a1, %lo(global.3)(a2) +; CHECK-NEXT: li a1, 5 +; CHECK-NEXT: bgeu a0, a1, .LBB0_2 ; CHECK-NEXT: # %bb.1: # %bb12 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/pr58511.ll b/llvm/test/CodeGen/RISCV/pr58511.ll index e5cba679729fa..c06a5b1cf11fa 100644 --- a/llvm/test/CodeGen/RISCV/pr58511.ll +++ b/llvm/test/CodeGen/RISCV/pr58511.ll @@ -47,8 +47,8 @@ define i32 @h(i1 %0, i32 %1, ptr %2) { ; CHECK-NEXT: addiw a3, a3, -2047 ; CHECK-NEXT: srai a0, a0, 63 ; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: and a0, a0, a3 ; CHECK-NEXT: sw a1, 0(a2) +; CHECK-NEXT: and a0, a0, a3 ; CHECK-NEXT: ret BB: %I = select i1 %0, i32 -1, i32 0 @@ -66,8 +66,8 @@ define i32 @i(i1 %0, i32 %1, ptr %2) { ; CHECK-NEXT: addiw a3, a3, -2047 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: and a0, a0, a3 ; CHECK-NEXT: sw a1, 0(a2) +; CHECK-NEXT: and a0, a0, a3 ; CHECK-NEXT: ret BB: %I = select i1 %0, i32 0, i32 -1 diff --git a/llvm/test/CodeGen/RISCV/pr63816.ll b/llvm/test/CodeGen/RISCV/pr63816.ll index 75ddeda3de507..5632e8ec16224 100644 --- a/llvm/test/CodeGen/RISCV/pr63816.ll +++ b/llvm/test/CodeGen/RISCV/pr63816.ll @@ -47,12 +47,12 @@ define void @test(ptr %0, ptr %1) nounwind { ; CHECK-NEXT: fcvt.d.s fs6, fa0 ; CHECK-NEXT: fcvt.d.s fs5, fs5 ; CHECK-NEXT: fcvt.d.s fs4, fs4 -; CHECK-NEXT: lhu a0, 14(s1) ; CHECK-NEXT: fcvt.d.s fs3, fs3 ; CHECK-NEXT: fcvt.d.s fs2, fs2 ; CHECK-NEXT: fcvt.d.s fs1, fs1 -; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: fcvt.d.s fs0, fs0 +; CHECK-NEXT: lhu a0, 14(s1) +; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: fcvt.d.s fa5, fa0 ; CHECK-NEXT: fsd fs2, 32(s0) diff --git a/llvm/test/CodeGen/RISCV/pr69586.ll b/llvm/test/CodeGen/RISCV/pr69586.ll index 9fc9a3c42867e..55c198aeb98b0 100644 --- a/llvm/test/CodeGen/RISCV/pr69586.ll +++ b/llvm/test/CodeGen/RISCV/pr69586.ll @@ -7,21 +7,21 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-LABEL: test: ; NOREMAT: # %bb.0: -; NOREMAT-NEXT: addi sp, sp, -752 -; NOREMAT-NEXT: .cfi_def_cfa_offset 752 -; NOREMAT-NEXT: sd ra, 744(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s0, 736(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s1, 728(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s2, 720(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s3, 712(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s4, 704(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s5, 696(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s6, 688(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s7, 680(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s8, 672(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s9, 664(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s10, 656(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sd s11, 648(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: addi sp, sp, -720 +; NOREMAT-NEXT: .cfi_def_cfa_offset 720 +; NOREMAT-NEXT: sd ra, 712(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s0, 704(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s1, 696(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s2, 688(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s3, 680(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s4, 672(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s5, 664(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s6, 656(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s7, 648(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s8, 640(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s9, 632(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s10, 624(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd s11, 616(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: .cfi_offset ra, -8 ; NOREMAT-NEXT: .cfi_offset s0, -16 ; NOREMAT-NEXT: .cfi_offset s1, -24 @@ -35,608 +35,597 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: .cfi_offset s9, -88 ; NOREMAT-NEXT: .cfi_offset s10, -96 ; NOREMAT-NEXT: .cfi_offset s11, -104 -; NOREMAT-NEXT: csrr a2, vlenb -; NOREMAT-NEXT: slli a2, a2, 1 -; NOREMAT-NEXT: sub sp, sp, a2 -; NOREMAT-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xf0, 0x05, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 752 + 2 * vlenb ; NOREMAT-NEXT: mv a7, a0 ; NOREMAT-NEXT: li a0, 32 -; NOREMAT-NEXT: addi a5, a7, 512 +; NOREMAT-NEXT: addi a6, a7, 512 ; NOREMAT-NEXT: addi a4, a7, 1024 -; NOREMAT-NEXT: addi a6, a7, 1536 -; NOREMAT-NEXT: li t4, 1 -; NOREMAT-NEXT: li a2, 5 +; NOREMAT-NEXT: addi a5, a7, 1536 +; NOREMAT-NEXT: li t0, 1 +; NOREMAT-NEXT: li a3, 5 ; NOREMAT-NEXT: li t1, 3 -; NOREMAT-NEXT: li t0, 7 -; NOREMAT-NEXT: lui t5, 1 -; NOREMAT-NEXT: li s4, 9 -; NOREMAT-NEXT: li s6, 11 -; NOREMAT-NEXT: li s9, 13 -; NOREMAT-NEXT: li ra, 15 -; NOREMAT-NEXT: lui t2, 2 -; NOREMAT-NEXT: lui s1, 3 -; NOREMAT-NEXT: lui t3, 4 -; NOREMAT-NEXT: lui s0, 5 -; NOREMAT-NEXT: lui s3, 6 -; NOREMAT-NEXT: lui s7, 7 -; NOREMAT-NEXT: vsetvli zero, a0, e32, m2, ta, ma -; NOREMAT-NEXT: slli t4, t4, 11 -; NOREMAT-NEXT: sd t4, 512(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: slli a3, a2, 9 -; NOREMAT-NEXT: sd a3, 504(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: slli t6, t1, 10 -; NOREMAT-NEXT: slli s2, t0, 9 -; NOREMAT-NEXT: add a0, a7, t5 -; NOREMAT-NEXT: lui s11, 1 -; NOREMAT-NEXT: slli s4, s4, 9 -; NOREMAT-NEXT: slli s5, a2, 10 -; NOREMAT-NEXT: slli s6, s6, 9 -; NOREMAT-NEXT: slli s8, t1, 11 -; NOREMAT-NEXT: vle32.v v8, (a5) -; NOREMAT-NEXT: slli s9, s9, 9 +; NOREMAT-NEXT: li a2, 7 +; NOREMAT-NEXT: lui t4, 1 +; NOREMAT-NEXT: li s8, 9 +; NOREMAT-NEXT: li s10, 11 ; NOREMAT-NEXT: li t5, 13 -; NOREMAT-NEXT: vle32.v v10, (a4) -; NOREMAT-NEXT: vle32.v v2, (a4) -; NOREMAT-NEXT: slli s10, t0, 10 -; NOREMAT-NEXT: vle32.v v0, (a6) -; NOREMAT-NEXT: vle32.v v12, (a6) -; NOREMAT-NEXT: slli ra, ra, 9 +; NOREMAT-NEXT: lui s1, 2 +; NOREMAT-NEXT: lui t3, 3 +; NOREMAT-NEXT: lui s3, 4 +; NOREMAT-NEXT: lui s11, 5 +; NOREMAT-NEXT: lui t2, 6 +; NOREMAT-NEXT: lui t6, 7 +; NOREMAT-NEXT: lui s5, 8 +; NOREMAT-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; NOREMAT-NEXT: slli t0, t0, 11 +; NOREMAT-NEXT: slli s0, a3, 9 +; NOREMAT-NEXT: slli s4, t1, 10 +; NOREMAT-NEXT: slli s6, a2, 9 +; NOREMAT-NEXT: add a0, a7, t4 +; NOREMAT-NEXT: slli s8, s8, 9 +; NOREMAT-NEXT: slli s9, a3, 10 +; NOREMAT-NEXT: vle32.v v10, (a6) +; NOREMAT-NEXT: slli s10, s10, 9 +; NOREMAT-NEXT: slli ra, t1, 11 +; NOREMAT-NEXT: vle32.v v14, (a4) +; NOREMAT-NEXT: vle32.v v8, (a4) +; NOREMAT-NEXT: slli t5, t5, 9 +; NOREMAT-NEXT: vle32.v v0, (a5) +; NOREMAT-NEXT: vle32.v v12, (a5) +; NOREMAT-NEXT: add a4, a7, s1 +; NOREMAT-NEXT: vle32.v v2, (a0) ; NOREMAT-NEXT: vle32.v v4, (a0) -; NOREMAT-NEXT: vle32.v v20, (a0) -; NOREMAT-NEXT: add a4, a7, t2 +; NOREMAT-NEXT: add a5, a7, t3 ; NOREMAT-NEXT: vle32.v v6, (a4) ; NOREMAT-NEXT: vle32.v v30, (a4) -; NOREMAT-NEXT: add a4, a7, s1 -; NOREMAT-NEXT: vle32.v v28, (a4) -; NOREMAT-NEXT: vle32.v v26, (a4) -; NOREMAT-NEXT: add a4, a7, t3 +; NOREMAT-NEXT: add a4, a7, s3 +; NOREMAT-NEXT: vle32.v v28, (a5) +; NOREMAT-NEXT: vle32.v v26, (a5) +; NOREMAT-NEXT: add a5, a7, s11 ; NOREMAT-NEXT: vle32.v v24, (a4) ; NOREMAT-NEXT: vle32.v v22, (a4) -; NOREMAT-NEXT: add a4, a7, s0 -; NOREMAT-NEXT: vle32.v v14, (a7) -; NOREMAT-NEXT: vle32.v v18, (a4) +; NOREMAT-NEXT: add a4, a7, t2 +; NOREMAT-NEXT: vle32.v v20, (a5) +; NOREMAT-NEXT: vle32.v v18, (a5) +; NOREMAT-NEXT: add a5, a7, t6 +; NOREMAT-NEXT: vle32.v v16, (a7) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v10 ; NOREMAT-NEXT: vle32.v v16, (a4) -; NOREMAT-NEXT: add a4, a7, s3 -; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v8 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v14 ; NOREMAT-NEXT: vle32.v v14, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v10 -; NOREMAT-NEXT: vle32.v v8, (a4) -; NOREMAT-NEXT: addi a0, sp, 640 -; NOREMAT-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill -; NOREMAT-NEXT: add a4, a7, t4 -; NOREMAT-NEXT: vle32.v v10, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v0 -; NOREMAT-NEXT: vle32.v v2, (a4) -; NOREMAT-NEXT: add a4, a7, a3 -; NOREMAT-NEXT: vle32.v v0, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v10 -; NOREMAT-NEXT: vle32.v v10, (a4) -; NOREMAT-NEXT: add a4, a7, t6 -; NOREMAT-NEXT: vle32.v v12, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v0 -; NOREMAT-NEXT: vle32.v v2, (a4) -; NOREMAT-NEXT: add a4, a7, s2 -; NOREMAT-NEXT: vle32.v v8, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v12 -; NOREMAT-NEXT: vle32.v v12, (a4) -; NOREMAT-NEXT: add a4, a7, s7 -; NOREMAT-NEXT: vle32.v v0, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v8 -; NOREMAT-NEXT: vle32.v v10, (a4) -; NOREMAT-NEXT: add a4, a7, s4 -; NOREMAT-NEXT: vle32.v v8, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v4 -; NOREMAT-NEXT: vle32.v v12, (a4) ; NOREMAT-NEXT: add a4, a7, s5 -; NOREMAT-NEXT: vle32.v v4, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v20, v8 -; NOREMAT-NEXT: vle32.v v8, (a4) -; NOREMAT-NEXT: add a4, a7, s6 -; NOREMAT-NEXT: vle32.v v20, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v4 +; NOREMAT-NEXT: vle32.v v10, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v0 +; NOREMAT-NEXT: vle32.v v8, (a5) +; NOREMAT-NEXT: add a5, a7, t0 +; NOREMAT-NEXT: mv t3, t0 +; NOREMAT-NEXT: vle32.v v0, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v0 +; NOREMAT-NEXT: vle32.v v12, (a5) +; NOREMAT-NEXT: add a5, a7, s0 +; NOREMAT-NEXT: vle32.v v0, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v0 +; NOREMAT-NEXT: vle32.v v12, (a5) +; NOREMAT-NEXT: add a5, a7, s4 +; NOREMAT-NEXT: vle32.v v0, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v0 +; NOREMAT-NEXT: vle32.v v12, (a5) +; NOREMAT-NEXT: add a5, a7, s6 +; NOREMAT-NEXT: vle32.v v0, (a5) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v0 ; NOREMAT-NEXT: vle32.v v12, (a4) +; NOREMAT-NEXT: slli a6, a2, 10 +; NOREMAT-NEXT: sd a6, 608(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: vle32.v v0, (a5) ; NOREMAT-NEXT: add a4, a7, s8 -; NOREMAT-NEXT: vle32.v v4, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v20 -; NOREMAT-NEXT: vle32.v v8, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v2 +; NOREMAT-NEXT: vle32.v v2, (a4) +; NOREMAT-NEXT: vle32.v v0, (a4) ; NOREMAT-NEXT: add a4, a7, s9 -; NOREMAT-NEXT: vle32.v v20, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v4 -; NOREMAT-NEXT: vle32.v v12, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v2 +; NOREMAT-NEXT: vle32.v v4, (a4) +; NOREMAT-NEXT: vle32.v v2, (a4) ; NOREMAT-NEXT: add a4, a7, s10 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v4 ; NOREMAT-NEXT: vle32.v v4, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v20 -; NOREMAT-NEXT: vle32.v v8, (a4) +; NOREMAT-NEXT: vle32.v v0, (a4) ; NOREMAT-NEXT: add a4, a7, ra +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v4 +; NOREMAT-NEXT: vle32.v v4, (a4) ; NOREMAT-NEXT: vle32.v v2, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v4 -; NOREMAT-NEXT: lui t4, 8 -; NOREMAT-NEXT: add a5, a7, t4 -; NOREMAT-NEXT: vle32.v v20, (a5) -; NOREMAT-NEXT: vle32.v v12, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v2 +; NOREMAT-NEXT: add a4, a7, t5 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v4 +; NOREMAT-NEXT: vle32.v v4, (a4) +; NOREMAT-NEXT: vle32.v v0, (a4) +; NOREMAT-NEXT: add a4, a7, a6 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v4 +; NOREMAT-NEXT: vle32.v v4, (a4) +; NOREMAT-NEXT: vle32.v v2, (a4) +; NOREMAT-NEXT: li a5, 15 +; NOREMAT-NEXT: slli a4, a5, 9 +; NOREMAT-NEXT: sd a4, 600(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v4 +; NOREMAT-NEXT: add a4, a7, a4 +; NOREMAT-NEXT: vle32.v v4, (a4) +; NOREMAT-NEXT: vle32.v v0, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v4 ; NOREMAT-NEXT: li a4, 17 ; NOREMAT-NEXT: slli a4, a4, 9 -; NOREMAT-NEXT: li s1, 17 -; NOREMAT-NEXT: sd a4, 624(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: li t4, 17 +; NOREMAT-NEXT: sd a4, 592(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a4, a7, a4 -; NOREMAT-NEXT: vle32.v v8, (a4) ; NOREMAT-NEXT: vle32.v v4, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v6 -; NOREMAT-NEXT: li a5, 9 -; NOREMAT-NEXT: slli a4, a5, 10 -; NOREMAT-NEXT: sd a4, 616(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: vle32.v v2, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v6 +; NOREMAT-NEXT: li t1, 9 +; NOREMAT-NEXT: slli a4, t1, 10 +; NOREMAT-NEXT: sd a4, 584(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a4, a7, a4 -; NOREMAT-NEXT: vle32.v v12, (a4) ; NOREMAT-NEXT: vle32.v v6, (a4) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v8 +; NOREMAT-NEXT: vle32.v v0, (a4) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v4 ; NOREMAT-NEXT: li a4, 19 ; NOREMAT-NEXT: slli a4, a4, 9 -; NOREMAT-NEXT: li t2, 19 -; NOREMAT-NEXT: sd a4, 608(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: li s1, 19 +; NOREMAT-NEXT: sd a4, 576(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a4, a7, a4 -; NOREMAT-NEXT: vle32.v v8, (a4) ; NOREMAT-NEXT: vle32.v v30, (a4) -; NOREMAT-NEXT: slli a3, a2, 11 -; NOREMAT-NEXT: sd a3, 600(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v12 +; NOREMAT-NEXT: vle32.v v4, (a4) +; NOREMAT-NEXT: slli a3, a3, 11 +; NOREMAT-NEXT: sd a3, 568(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v6 ; NOREMAT-NEXT: add a3, a7, a3 -; NOREMAT-NEXT: vle32.v v12, (a3) -; NOREMAT-NEXT: vle32.v v4, (a3) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v8 +; NOREMAT-NEXT: vle32.v v6, (a3) +; NOREMAT-NEXT: vle32.v v2, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v30 ; NOREMAT-NEXT: li s7, 21 ; NOREMAT-NEXT: slli a3, s7, 9 -; NOREMAT-NEXT: sd a3, 592(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: add a3, a7, a3 -; NOREMAT-NEXT: vle32.v v8, (a3) -; NOREMAT-NEXT: vle32.v v6, (a3) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v12 -; NOREMAT-NEXT: li a6, 11 -; NOREMAT-NEXT: slli a3, a6, 10 -; NOREMAT-NEXT: sd a3, 584(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd a3, 560(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a3, a7, a3 -; NOREMAT-NEXT: vle32.v v12, (a3) ; NOREMAT-NEXT: vle32.v v30, (a3) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v8 -; NOREMAT-NEXT: li s3, 23 -; NOREMAT-NEXT: slli a3, s3, 9 -; NOREMAT-NEXT: sd a3, 576(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: vle32.v v0, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v6 +; NOREMAT-NEXT: li a4, 11 +; NOREMAT-NEXT: slli a3, a4, 10 +; NOREMAT-NEXT: sd a3, 552(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a3, a7, a3 -; NOREMAT-NEXT: vle32.v v8, (a3) +; NOREMAT-NEXT: vle32.v v6, (a3) ; NOREMAT-NEXT: vle32.v v4, (a3) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v12 -; NOREMAT-NEXT: li s0, 25 -; NOREMAT-NEXT: slli a3, s0, 9 -; NOREMAT-NEXT: sd a3, 568(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v30 +; NOREMAT-NEXT: li s2, 23 +; NOREMAT-NEXT: slli a3, s2, 9 +; NOREMAT-NEXT: sd a3, 544(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a3, a7, a3 +; NOREMAT-NEXT: vle32.v v30, (a3) +; NOREMAT-NEXT: vle32.v v2, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v6 +; NOREMAT-NEXT: li t6, 25 +; NOREMAT-NEXT: slli a3, t6, 9 +; NOREMAT-NEXT: sd a3, 536(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a3, a7, a3 -; NOREMAT-NEXT: vle32.v v12, (a3) ; NOREMAT-NEXT: vle32.v v6, (a3) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v8 -; NOREMAT-NEXT: slli a3, t5, 10 -; NOREMAT-NEXT: sd a3, 560(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: vle32.v v0, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v30 +; NOREMAT-NEXT: li a6, 13 +; NOREMAT-NEXT: slli a3, a6, 10 +; NOREMAT-NEXT: sd a3, 528(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a3, a7, a3 -; NOREMAT-NEXT: vle32.v v8, (a3) ; NOREMAT-NEXT: vle32.v v30, (a3) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v28 -; NOREMAT-NEXT: li t3, 27 -; NOREMAT-NEXT: slli a3, t3, 9 -; NOREMAT-NEXT: sd a3, 552(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: vle32.v v4, (a3) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v28 +; NOREMAT-NEXT: li t2, 27 +; NOREMAT-NEXT: slli a3, t2, 9 +; NOREMAT-NEXT: sd a3, 520(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a3, a7, a3 ; NOREMAT-NEXT: vle32.v v28, (a3) -; NOREMAT-NEXT: vle32.v v4, (a3) -; NOREMAT-NEXT: slli a2, t0, 11 -; NOREMAT-NEXT: sd a2, 544(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v12 +; NOREMAT-NEXT: vle32.v v2, (a3) +; NOREMAT-NEXT: slli a2, a2, 11 +; NOREMAT-NEXT: sd a2, 512(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v6 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v12, (a2) ; NOREMAT-NEXT: vle32.v v26, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v8 -; NOREMAT-NEXT: li t0, 29 -; NOREMAT-NEXT: slli a2, t0, 9 -; NOREMAT-NEXT: sd a2, 536(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v8, (a2) ; NOREMAT-NEXT: vle32.v v6, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v28 -; NOREMAT-NEXT: li a3, 15 -; NOREMAT-NEXT: slli a2, a3, 10 -; NOREMAT-NEXT: sd a2, 528(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v30 +; NOREMAT-NEXT: li a3, 29 +; NOREMAT-NEXT: slli a2, a3, 9 +; NOREMAT-NEXT: sd a2, 504(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v28, (a2) ; NOREMAT-NEXT: vle32.v v30, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v12 -; NOREMAT-NEXT: li t1, 31 -; NOREMAT-NEXT: slli a2, t1, 9 -; NOREMAT-NEXT: sd a2, 520(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: vle32.v v0, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v28 +; NOREMAT-NEXT: slli a2, a5, 10 +; NOREMAT-NEXT: sd a2, 496(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: li t0, 15 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v12, (a2) +; NOREMAT-NEXT: vle32.v v28, (a2) ; NOREMAT-NEXT: vle32.v v4, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v8 -; NOREMAT-NEXT: lui a4, 4 -; NOREMAT-NEXT: addiw a0, a4, 512 -; NOREMAT-NEXT: sd a0, 496(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v26 +; NOREMAT-NEXT: li a5, 31 +; NOREMAT-NEXT: slli a0, a5, 9 +; NOREMAT-NEXT: sd a0, 488(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a0, a7, a0 -; NOREMAT-NEXT: vle32.v v8, (a0) ; NOREMAT-NEXT: vle32.v v26, (a0) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v28 -; NOREMAT-NEXT: slli a2, s1, 10 -; NOREMAT-NEXT: sd a2, 488(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v28, (a2) -; NOREMAT-NEXT: vle32.v v6, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v12 -; NOREMAT-NEXT: addiw a2, a4, 1536 +; NOREMAT-NEXT: vle32.v v2, (a0) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v30 +; NOREMAT-NEXT: addiw a2, s3, 512 ; NOREMAT-NEXT: sd a2, 480(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v12, (a2) ; NOREMAT-NEXT: vle32.v v30, (a2) -; NOREMAT-NEXT: slli a2, a5, 11 +; NOREMAT-NEXT: vle32.v v6, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v28 +; NOREMAT-NEXT: slli a2, t4, 10 ; NOREMAT-NEXT: sd a2, 472(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v24 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v24, (a2) -; NOREMAT-NEXT: vle32.v v4, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v8 -; NOREMAT-NEXT: lui a5, 5 -; NOREMAT-NEXT: addiw a2, a5, -1536 +; NOREMAT-NEXT: vle32.v v28, (a2) +; NOREMAT-NEXT: vle32.v v0, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v26 +; NOREMAT-NEXT: addiw a2, s3, 1536 ; NOREMAT-NEXT: sd a2, 464(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v8, (a2) -; NOREMAT-NEXT: vle32.v v22, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v28 -; NOREMAT-NEXT: slli a2, t2, 10 +; NOREMAT-NEXT: vle32.v v26, (a2) +; NOREMAT-NEXT: vle32.v v4, (a2) +; NOREMAT-NEXT: slli a2, t1, 11 ; NOREMAT-NEXT: sd a2, 456(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: li t2, 19 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v24 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v26, (a2) -; NOREMAT-NEXT: vle32.v v28, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v12 -; NOREMAT-NEXT: addiw a2, a5, -512 +; NOREMAT-NEXT: vle32.v v24, (a2) +; NOREMAT-NEXT: vle32.v v2, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v30 +; NOREMAT-NEXT: addiw a2, s11, -1536 ; NOREMAT-NEXT: sd a2, 448(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v12, (a2) -; NOREMAT-NEXT: vle32.v v6, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v24 -; NOREMAT-NEXT: addiw a2, a5, 512 +; NOREMAT-NEXT: vle32.v v22, (a2) +; NOREMAT-NEXT: vle32.v v30, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v28 +; NOREMAT-NEXT: slli a2, s1, 10 ; NOREMAT-NEXT: sd a2, 440(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v24, (a2) -; NOREMAT-NEXT: vle32.v v30, (a2) -; NOREMAT-NEXT: slli a2, s7, 10 +; NOREMAT-NEXT: vle32.v v28, (a2) +; NOREMAT-NEXT: vle32.v v6, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v26 +; NOREMAT-NEXT: addiw a2, s11, -512 ; NOREMAT-NEXT: sd a2, 432(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v8 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v8, (a2) -; NOREMAT-NEXT: vle32.v v4, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v22, v26 -; NOREMAT-NEXT: addiw a2, a5, 1536 +; NOREMAT-NEXT: vle32.v v26, (a2) +; NOREMAT-NEXT: vle32.v v0, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v24 +; NOREMAT-NEXT: addiw a2, s11, 512 ; NOREMAT-NEXT: sd a2, 424(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v22, (a2) -; NOREMAT-NEXT: vle32.v v26, (a2) -; NOREMAT-NEXT: slli a2, a6, 11 +; NOREMAT-NEXT: vle32.v v24, (a2) +; NOREMAT-NEXT: vle32.v v4, (a2) +; NOREMAT-NEXT: slli a2, s7, 10 ; NOREMAT-NEXT: sd a2, 416(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v28, v12 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v22 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v12, (a2) -; NOREMAT-NEXT: vle32.v v28, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v18 -; NOREMAT-NEXT: lui a6, 6 -; NOREMAT-NEXT: addiw a2, a6, -1536 +; NOREMAT-NEXT: vle32.v v22, (a2) +; NOREMAT-NEXT: vle32.v v2, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v28 +; NOREMAT-NEXT: addiw a2, s11, 1536 ; NOREMAT-NEXT: sd a2, 408(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v18, (a2) -; NOREMAT-NEXT: vle32.v v6, (a2) -; NOREMAT-NEXT: slli a2, s3, 10 +; NOREMAT-NEXT: vle32.v v28, (a2) +; NOREMAT-NEXT: vle32.v v30, (a2) +; NOREMAT-NEXT: slli a2, a4, 11 ; NOREMAT-NEXT: sd a2, 400(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v16, v24 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v26 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v16, (a2) -; NOREMAT-NEXT: vle32.v v24, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v8 -; NOREMAT-NEXT: addiw a2, a6, -512 +; NOREMAT-NEXT: vle32.v v26, (a2) +; NOREMAT-NEXT: vle32.v v6, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v20 +; NOREMAT-NEXT: lui a4, 6 +; NOREMAT-NEXT: addiw a2, a4, -1536 ; NOREMAT-NEXT: sd a2, 392(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v8, (a2) -; NOREMAT-NEXT: vle32.v v30, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v22 -; NOREMAT-NEXT: addiw a2, a6, 512 +; NOREMAT-NEXT: vle32.v v20, (a2) +; NOREMAT-NEXT: vle32.v v0, (a2) +; NOREMAT-NEXT: slli a2, s2, 10 ; NOREMAT-NEXT: sd a2, 384(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sf.vc.vv 3, 0, v18, v24 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v22, (a2) -; NOREMAT-NEXT: vle32.v v4, (a2) -; NOREMAT-NEXT: slli a2, s0, 10 +; NOREMAT-NEXT: vle32.v v18, (a2) +; NOREMAT-NEXT: vle32.v v24, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v22 +; NOREMAT-NEXT: addiw a2, a4, -512 ; NOREMAT-NEXT: sd a2, 376(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v26, v12 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v26, (a2) -; NOREMAT-NEXT: vle32.v v2, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v28, v18 -; NOREMAT-NEXT: addiw a2, a6, 1536 +; NOREMAT-NEXT: vle32.v v22, (a2) +; NOREMAT-NEXT: vle32.v v4, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v28 +; NOREMAT-NEXT: addiw a2, a4, 512 ; NOREMAT-NEXT: sd a2, 368(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v18, (a2) ; NOREMAT-NEXT: vle32.v v28, (a2) -; NOREMAT-NEXT: slli a2, t5, 11 +; NOREMAT-NEXT: vle32.v v2, (a2) +; NOREMAT-NEXT: slli a2, t6, 10 ; NOREMAT-NEXT: sd a2, 360(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v16 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v26 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v16, (a2) -; NOREMAT-NEXT: vle32.v v6, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v8 -; NOREMAT-NEXT: lui s0, 7 -; NOREMAT-NEXT: addiw a2, s0, -1536 +; NOREMAT-NEXT: vle32.v v26, (a2) +; NOREMAT-NEXT: vle32.v v30, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v20 +; NOREMAT-NEXT: addiw a2, a4, 1536 ; NOREMAT-NEXT: sd a2, 352(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v8, (a2) -; NOREMAT-NEXT: vle32.v v24, (a2) -; NOREMAT-NEXT: slli a2, t3, 10 +; NOREMAT-NEXT: vle32.v v20, (a2) +; NOREMAT-NEXT: vle32.v v6, (a2) +; NOREMAT-NEXT: slli a2, a6, 11 ; NOREMAT-NEXT: sd a2, 344(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v14 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v18 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v14, (a2) -; NOREMAT-NEXT: vle32.v v30, (a2) -; NOREMAT-NEXT: addi a0, sp, 640 -; NOREMAT-NEXT: vl2r.v v12, (a0) # Unknown-size Folded Reload -; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v22 -; NOREMAT-NEXT: addiw a2, s0, -512 +; NOREMAT-NEXT: vle32.v v18, (a2) +; NOREMAT-NEXT: vle32.v v0, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v22 +; NOREMAT-NEXT: lui a6, 7 +; NOREMAT-NEXT: addiw a2, a6, -1536 ; NOREMAT-NEXT: sd a2, 336(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 ; NOREMAT-NEXT: vle32.v v22, (a2) -; NOREMAT-NEXT: vle32.v v12, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v26 -; NOREMAT-NEXT: addiw a2, s0, 512 +; NOREMAT-NEXT: vle32.v v24, (a2) +; NOREMAT-NEXT: slli a2, t2, 10 ; NOREMAT-NEXT: sd a2, 328(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: lui t3, 7 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v16 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v26, (a2) +; NOREMAT-NEXT: vle32.v v16, (a2) ; NOREMAT-NEXT: vle32.v v4, (a2) -; NOREMAT-NEXT: slli a2, t0, 10 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v28 +; NOREMAT-NEXT: addiw a2, a6, -512 ; NOREMAT-NEXT: sd a2, 320(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v18 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v18, (a2) -; NOREMAT-NEXT: vle32.v v2, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v28, v16 -; NOREMAT-NEXT: addiw a2, t3, 1536 +; NOREMAT-NEXT: vle32.v v28, (a2) +; NOREMAT-NEXT: vle32.v v14, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v26 +; NOREMAT-NEXT: addiw a2, a6, 512 ; NOREMAT-NEXT: sd a2, 312(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v16, (a2) -; NOREMAT-NEXT: vle32.v v28, (a2) -; NOREMAT-NEXT: slli a2, a3, 11 +; NOREMAT-NEXT: vle32.v v26, (a2) +; NOREMAT-NEXT: vle32.v v2, (a2) +; NOREMAT-NEXT: slli a2, a3, 10 ; NOREMAT-NEXT: sd a2, 304(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v8 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v20 ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v8, (a2) -; NOREMAT-NEXT: vle32.v v6, (a2) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v14 -; NOREMAT-NEXT: addiw a2, t4, -1536 +; NOREMAT-NEXT: vle32.v v20, (a2) +; NOREMAT-NEXT: vle32.v v30, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v18 +; NOREMAT-NEXT: addiw a2, a6, 1536 ; NOREMAT-NEXT: sd a2, 296(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a7, a2 -; NOREMAT-NEXT: vle32.v v14, (a2) -; NOREMAT-NEXT: vle32.v v24, (a2) -; NOREMAT-NEXT: slli a2, t1, 10 +; NOREMAT-NEXT: vle32.v v18, (a2) +; NOREMAT-NEXT: vle32.v v6, (a2) +; NOREMAT-NEXT: slli a2, t0, 11 ; NOREMAT-NEXT: sd a2, 288(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v22 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v22 ; NOREMAT-NEXT: add a2, a7, a2 ; NOREMAT-NEXT: vle32.v v22, (a2) -; NOREMAT-NEXT: vle32.v v30, (a2) -; NOREMAT-NEXT: addiw a0, t4, -512 -; NOREMAT-NEXT: sd a0, 280(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: vle32.v v0, (a2) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v16 +; NOREMAT-NEXT: addiw a2, s5, -1536 +; NOREMAT-NEXT: sd a2, 280(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a2, a7, a2 +; NOREMAT-NEXT: vle32.v v16, (a2) +; NOREMAT-NEXT: vle32.v v24, (a2) +; NOREMAT-NEXT: slli a2, a5, 10 +; NOREMAT-NEXT: sd a2, 272(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v28 +; NOREMAT-NEXT: add a2, a7, a2 +; NOREMAT-NEXT: vle32.v v28, (a2) +; NOREMAT-NEXT: vle32.v v4, (a2) +; NOREMAT-NEXT: addiw a0, s5, -512 +; NOREMAT-NEXT: sd a0, 264(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a0, a7, a0 -; NOREMAT-NEXT: sf.vc.vv 3, 0, v12, v0 -; NOREMAT-NEXT: vle32.v v12, (a0) -; NOREMAT-NEXT: vle32.v v0, (a0) -; NOREMAT-NEXT: sf.vc.vv 3, 0, v10, v26 -; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v18 -; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v16 -; NOREMAT-NEXT: sf.vc.vv 3, 0, v28, v8 -; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v14 -; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v22 -; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v12 -; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v20 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v10 +; NOREMAT-NEXT: vle32.v v10, (a0) +; NOREMAT-NEXT: vle32.v v14, (a0) +; NOREMAT-NEXT: sf.vc.vv 3, 0, v8, v26 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v2, v20 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v30, v18 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v6, v22 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v0, v16 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v24, v28 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v4, v10 +; NOREMAT-NEXT: sf.vc.vv 3, 0, v14, v12 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: addi a0, a1, 1024 ; NOREMAT-NEXT: vse32.v v8, (a0) -; NOREMAT-NEXT: add s11, a1, s11 -; NOREMAT-NEXT: sd s11, 272(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: lui a0, 1 +; NOREMAT-NEXT: add a0, a1, a0 +; NOREMAT-NEXT: sd a0, 256(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: lui a0, 2 ; NOREMAT-NEXT: add a0, a1, a0 -; NOREMAT-NEXT: sd a0, 264(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd a0, 248(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: lui a0, 3 ; NOREMAT-NEXT: add a0, a1, a0 -; NOREMAT-NEXT: sd a0, 256(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd a0, 240(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add s3, a1, s3 +; NOREMAT-NEXT: sd s3, 232(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add s11, a1, s11 +; NOREMAT-NEXT: sd s11, 224(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a4, a1, a4 -; NOREMAT-NEXT: sd a4, 248(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: add a5, a1, a5 -; NOREMAT-NEXT: sd a5, 240(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd a4, 216(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a6, a1, a6 -; NOREMAT-NEXT: sd a6, 232(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: add t3, a1, t3 -; NOREMAT-NEXT: sd t3, 224(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: add a0, a1, t4 -; NOREMAT-NEXT: sd a0, 216(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: addiw a0, t4, 512 -; NOREMAT-NEXT: sd a0, 192(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: addiw a0, t4, 1024 +; NOREMAT-NEXT: sd a6, 208(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: add a0, a1, s5 +; NOREMAT-NEXT: sd a0, 200(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: addiw a0, s5, 512 ; NOREMAT-NEXT: sd a0, 176(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: addiw a0, t4, 1536 +; NOREMAT-NEXT: addiw a0, s5, 1024 ; NOREMAT-NEXT: sd a0, 160(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: slli s1, s1, 11 -; NOREMAT-NEXT: sd s1, 128(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: addiw a0, s5, 1536 +; NOREMAT-NEXT: sd a0, 144(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: slli t4, t4, 11 +; NOREMAT-NEXT: sd t4, 112(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: lui a0, 9 ; NOREMAT-NEXT: addiw a2, a0, -1536 -; NOREMAT-NEXT: sd a2, 88(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: addiw a2, a0, -1024 ; NOREMAT-NEXT: sd a2, 72(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: addiw a2, a0, -1024 +; NOREMAT-NEXT: sd a2, 56(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: addiw a2, a0, -512 -; NOREMAT-NEXT: sd a2, 40(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd a2, 24(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add a2, a1, a0 -; NOREMAT-NEXT: sd a2, 208(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd a2, 192(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: addiw s11, a0, 512 ; NOREMAT-NEXT: addiw s7, a0, 1024 ; NOREMAT-NEXT: addiw s3, a0, 1536 -; NOREMAT-NEXT: slli s1, t2, 11 +; NOREMAT-NEXT: slli s1, s1, 11 ; NOREMAT-NEXT: lui a0, 10 ; NOREMAT-NEXT: addiw t2, a0, -1536 ; NOREMAT-NEXT: addiw a7, a0, -1024 ; NOREMAT-NEXT: addiw a4, a0, -512 ; NOREMAT-NEXT: add a2, a1, a0 -; NOREMAT-NEXT: sd a2, 200(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd a2, 184(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: addiw a0, a0, 512 -; NOREMAT-NEXT: ld a2, 512(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: add a2, a1, a2 -; NOREMAT-NEXT: ld a3, 504(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: add a3, a1, a3 -; NOREMAT-NEXT: add a5, a1, t6 -; NOREMAT-NEXT: add a6, a1, s2 -; NOREMAT-NEXT: add t0, a1, s4 -; NOREMAT-NEXT: add t1, a1, s5 -; NOREMAT-NEXT: add t3, a1, s6 -; NOREMAT-NEXT: add t4, a1, s8 -; NOREMAT-NEXT: add t5, a1, s9 -; NOREMAT-NEXT: add t6, a1, s10 -; NOREMAT-NEXT: add s0, a1, ra -; NOREMAT-NEXT: ld s2, 624(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add a2, a1, t3 +; NOREMAT-NEXT: add a3, a1, s0 +; NOREMAT-NEXT: add a5, a1, s4 +; NOREMAT-NEXT: add a6, a1, s6 +; NOREMAT-NEXT: add t0, a1, s8 +; NOREMAT-NEXT: add t1, a1, s9 +; NOREMAT-NEXT: add t3, a1, s10 +; NOREMAT-NEXT: add t4, a1, ra +; NOREMAT-NEXT: add t5, a1, t5 +; NOREMAT-NEXT: ld t6, 608(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add t6, a1, t6 +; NOREMAT-NEXT: ld s0, 600(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add s0, a1, s0 +; NOREMAT-NEXT: ld s2, 592(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add s2, a1, s2 -; NOREMAT-NEXT: ld s4, 616(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s4, 584(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add s4, a1, s4 -; NOREMAT-NEXT: ld s5, 608(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s5, 576(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add s5, a1, s5 -; NOREMAT-NEXT: ld s6, 600(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s6, 568(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add s6, a1, s6 -; NOREMAT-NEXT: ld s8, 592(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s8, 560(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add s8, a1, s8 -; NOREMAT-NEXT: ld s9, 584(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s9, 552(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add s9, a1, s9 -; NOREMAT-NEXT: ld s10, 576(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s10, 544(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add s10, a1, s10 -; NOREMAT-NEXT: ld ra, 568(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 16(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: ld ra, 560(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 24(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: ld ra, 552(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 32(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: ld ra, 544(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 48(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 536(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 56(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 0(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 528(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 64(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 8(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 520(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 80(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 16(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: ld ra, 512(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add ra, a1, ra +; NOREMAT-NEXT: sd ra, 32(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: ld ra, 504(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add ra, a1, ra +; NOREMAT-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 496(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 96(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 48(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 488(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 104(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 64(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 480(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 112(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 80(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 472(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 120(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 88(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 464(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 136(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 96(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 456(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 144(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 104(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 448(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 152(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 120(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 440(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 168(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 128(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 432(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 184(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 424(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 424(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 152(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 416(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 432(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 168(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 408(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 440(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 408(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 400(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 448(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 416(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 392(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 456(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 424(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 384(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 464(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 432(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 376(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 472(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 440(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 368(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 480(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 448(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 360(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 488(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 456(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 352(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 496(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 464(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 344(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 504(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 472(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 336(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 512(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 480(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 328(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 520(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 488(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 320(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 528(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 496(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 312(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 536(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 504(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 304(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 544(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 512(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 296(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 552(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 520(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 288(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 560(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: sd ra, 528(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: ld ra, 280(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra +; NOREMAT-NEXT: sd ra, 536(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: ld ra, 272(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add ra, a1, ra +; NOREMAT-NEXT: sd ra, 544(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: ld ra, 264(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add ra, a1, ra +; NOREMAT-NEXT: sd ra, 552(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: ld ra, 176(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add ra, a1, ra +; NOREMAT-NEXT: sd ra, 560(sp) # 8-byte Folded Spill +; NOREMAT-NEXT: ld ra, 160(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: add ra, a1, ra ; NOREMAT-NEXT: sd ra, 568(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: ld ra, 192(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld ra, 144(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra ; NOREMAT-NEXT: sd ra, 576(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: ld ra, 176(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld ra, 112(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra ; NOREMAT-NEXT: sd ra, 584(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: ld ra, 160(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld ra, 72(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra ; NOREMAT-NEXT: sd ra, 592(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: ld ra, 128(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra ; NOREMAT-NEXT: sd ra, 600(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: ld ra, 88(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: add ra, a1, ra ; NOREMAT-NEXT: sd ra, 608(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: ld ra, 72(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 616(sp) # 8-byte Folded Spill -; NOREMAT-NEXT: ld ra, 40(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: add ra, a1, ra -; NOREMAT-NEXT: sd ra, 624(sp) # 8-byte Folded Spill ; NOREMAT-NEXT: add ra, a1, s11 ; NOREMAT-NEXT: add s11, a1, s7 ; NOREMAT-NEXT: add s7, a1, s3 @@ -657,7 +646,7 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: vse32.v v8, (a6) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 272(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 256(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: vse32.v v8, (t0) @@ -674,7 +663,7 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: vse32.v v8, (s0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 264(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 248(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: vse32.v v8, (s2) @@ -691,31 +680,37 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: vse32.v v8, (s10) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 256(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 240(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 16(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 0(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 24(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: ld a0, 32(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 48(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 40(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 56(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 48(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: ld a0, 64(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 +; NOREMAT-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: vse32.v v8, (a0) +; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: ld a0, 80(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 248(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 88(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: ld a0, 96(sp) # 8-byte Folded Reload @@ -724,28 +719,28 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: ld a0, 104(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 112(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 120(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 120(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 128(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: ld a0, 136(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 144(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 224(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: ld a0, 152(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 240(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 168(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 168(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 408(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 184(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 416(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: ld a0, 424(sp) # 8-byte Folded Reload @@ -757,13 +752,13 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: ld a0, 440(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 448(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 216(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 456(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 448(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 232(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 456(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: ld a0, 464(sp) # 8-byte Folded Reload @@ -781,13 +776,13 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: ld a0, 496(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 504(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 208(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 512(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 504(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 224(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 512(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: ld a0, 520(sp) # 8-byte Folded Reload @@ -805,13 +800,13 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: ld a0, 552(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 560(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 200(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 568(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 560(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 216(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 568(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: ld a0, 576(sp) # 8-byte Folded Reload @@ -829,13 +824,7 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: ld a0, 608(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 616(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: vse32.v v8, (a0) -; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 624(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: vse32.v v8, (a0) -; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 208(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 192(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: vse32.v v8, (ra) @@ -852,29 +841,25 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: vse32.v v8, (a7) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: ld a0, 200(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld a0, 184(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: vse32.v v8, (a0) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: vse32.v v8, (a4) ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; NOREMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 -; NOREMAT-NEXT: csrr a0, vlenb -; NOREMAT-NEXT: slli a0, a0, 1 -; NOREMAT-NEXT: add sp, sp, a0 -; NOREMAT-NEXT: .cfi_def_cfa sp, 752 -; NOREMAT-NEXT: ld ra, 744(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s0, 736(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s1, 728(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s2, 720(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s3, 712(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s4, 704(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s5, 696(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s6, 688(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s7, 680(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s8, 672(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s9, 664(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s10, 656(sp) # 8-byte Folded Reload -; NOREMAT-NEXT: ld s11, 648(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld ra, 712(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s0, 704(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s1, 696(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s2, 688(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s3, 680(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s4, 672(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s5, 664(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s6, 656(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s7, 648(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s8, 640(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s9, 632(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s10, 624(sp) # 8-byte Folded Reload +; NOREMAT-NEXT: ld s11, 616(sp) # 8-byte Folded Reload ; NOREMAT-NEXT: .cfi_restore ra ; NOREMAT-NEXT: .cfi_restore s0 ; NOREMAT-NEXT: .cfi_restore s1 @@ -888,7 +873,7 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; NOREMAT-NEXT: .cfi_restore s9 ; NOREMAT-NEXT: .cfi_restore s10 ; NOREMAT-NEXT: .cfi_restore s11 -; NOREMAT-NEXT: addi sp, sp, 752 +; NOREMAT-NEXT: addi sp, sp, 720 ; NOREMAT-NEXT: .cfi_def_cfa_offset 0 ; NOREMAT-NEXT: ret ; @@ -923,10 +908,10 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: .cfi_offset s10, -96 ; REMAT-NEXT: .cfi_offset s11, -104 ; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: li a3, 18 +; REMAT-NEXT: li a3, 14 ; REMAT-NEXT: mul a2, a2, a3 ; REMAT-NEXT: sub sp, sp, a2 -; REMAT-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0x12, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 544 + 18 * vlenb +; REMAT-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0xa0, 0x04, 0x22, 0x11, 0x0e, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 544 + 14 * vlenb ; REMAT-NEXT: li a4, 32 ; REMAT-NEXT: addi a5, a0, 512 ; REMAT-NEXT: addi a3, a0, 1024 @@ -976,51 +961,32 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: vle32.v v8, (a5) ; REMAT-NEXT: li a4, 13 ; REMAT-NEXT: slli a4, a4, 10 -; REMAT-NEXT: vle32.v v10, (a3) ; REMAT-NEXT: vle32.v v12, (a3) +; REMAT-NEXT: vle32.v v14, (a3) ; REMAT-NEXT: li a3, 27 ; REMAT-NEXT: slli a3, a3, 9 -; REMAT-NEXT: vle32.v v14, (a2) ; REMAT-NEXT: vle32.v v16, (a2) -; REMAT-NEXT: add a2, a0, a6 ; REMAT-NEXT: vle32.v v18, (a2) +; REMAT-NEXT: add a2, a0, a6 ; REMAT-NEXT: vle32.v v20, (a2) -; REMAT-NEXT: add a2, a0, a7 ; REMAT-NEXT: vle32.v v22, (a2) +; REMAT-NEXT: add a2, a0, a7 ; REMAT-NEXT: vle32.v v24, (a2) -; REMAT-NEXT: add a2, a0, t0 ; REMAT-NEXT: vle32.v v26, (a2) +; REMAT-NEXT: add a2, a0, t0 ; REMAT-NEXT: vle32.v v28, (a2) -; REMAT-NEXT: add a2, a0, t1 ; REMAT-NEXT: vle32.v v30, (a2) +; REMAT-NEXT: add a2, a0, t1 ; REMAT-NEXT: vle32.v v6, (a2) -; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: slli a2, a2, 4 -; REMAT-NEXT: add a2, sp, a2 -; REMAT-NEXT: addi a2, a2, 432 -; REMAT-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill +; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: add a2, a0, t2 -; REMAT-NEXT: vle32.v v4, (a0) ; REMAT-NEXT: vle32.v v2, (a2) -; REMAT-NEXT: vle32.v v6, (a2) -; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: li a5, 14 -; REMAT-NEXT: mul a2, a2, a5 -; REMAT-NEXT: add a2, sp, a2 -; REMAT-NEXT: addi a2, a2, 432 -; REMAT-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill -; REMAT-NEXT: add a2, a0, t3 -; REMAT-NEXT: sf.vc.vv 3, 0, v4, v8 -; REMAT-NEXT: vle32.v v4, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v8, v10 -; REMAT-NEXT: vle32.v v6, (a2) -; REMAT-NEXT: add a2, a0, t4 ; REMAT-NEXT: vle32.v v10, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v12, v14 +; REMAT-NEXT: add a2, a0, t3 +; REMAT-NEXT: vle32.v v0, (a0) +; REMAT-NEXT: sf.vc.vv 3, 0, v0, v8 ; REMAT-NEXT: vle32.v v0, (a2) -; REMAT-NEXT: add a2, a0, t5 -; REMAT-NEXT: vle32.v v14, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v16, v18 +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v12 ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: csrr a2, vlenb ; REMAT-NEXT: li a5, 12 @@ -1028,117 +994,112 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: add a2, sp, a2 ; REMAT-NEXT: addi a2, a2, 432 ; REMAT-NEXT: vs2r.v v8, (a2) # Unknown-size Folded Spill -; REMAT-NEXT: add a2, a0, t6 +; REMAT-NEXT: add a2, a0, t4 +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v14, v16 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: add a2, a0, t5 +; REMAT-NEXT: vle32.v v16, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v18, v20 ; REMAT-NEXT: vle32.v v18, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v20, v22 +; REMAT-NEXT: add a2, a0, t6 ; REMAT-NEXT: vle32.v v20, (a2) -; REMAT-NEXT: add a2, a0, s0 +; REMAT-NEXT: sf.vc.vv 3, 0, v22, v24 ; REMAT-NEXT: vle32.v v22, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v24, v26 +; REMAT-NEXT: add a2, a0, s0 ; REMAT-NEXT: vle32.v v24, (a2) -; REMAT-NEXT: add a2, a0, s1 +; REMAT-NEXT: sf.vc.vv 3, 0, v26, v28 ; REMAT-NEXT: vle32.v v26, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v28, v30 +; REMAT-NEXT: add a2, a0, s1 ; REMAT-NEXT: vle32.v v28, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v30, v6 +; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: add a2, a0, s2 ; REMAT-NEXT: vle32.v v8, (a2) -; REMAT-NEXT: csrr a5, vlenb -; REMAT-NEXT: slli a5, a5, 4 -; REMAT-NEXT: add a5, sp, a5 -; REMAT-NEXT: addi a5, a5, 432 -; REMAT-NEXT: vl2r.v v12, (a5) # Unknown-size Folded Reload -; REMAT-NEXT: sf.vc.vv 3, 0, v12, v2 -; REMAT-NEXT: vle32.v v2, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v4, v2 +; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: add a2, a0, s3 -; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: vle32.v v2, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v0 +; REMAT-NEXT: vle32.v v0, (a2) +; REMAT-NEXT: add a2, a0, s4 +; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: csrr a5, vlenb -; REMAT-NEXT: li a6, 14 +; REMAT-NEXT: li a6, 12 ; REMAT-NEXT: mul a5, a5, a6 ; REMAT-NEXT: add a5, sp, a5 ; REMAT-NEXT: addi a5, a5, 432 -; REMAT-NEXT: vl2r.v v16, (a5) # Unknown-size Folded Reload -; REMAT-NEXT: sf.vc.vv 3, 0, v16, v4 +; REMAT-NEXT: vl2r.v v30, (a5) # Unknown-size Folded Reload +; REMAT-NEXT: sf.vc.vv 3, 0, v30, v12 ; REMAT-NEXT: vle32.v v30, (a2) -; REMAT-NEXT: add a2, a0, s4 -; REMAT-NEXT: vle32.v v16, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v6, v10 -; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: add a2, a0, s5 -; REMAT-NEXT: vle32.v v10, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v0, v14 -; REMAT-NEXT: vle32.v v4, (a2) +; REMAT-NEXT: vle32.v v12, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v14, v16 +; REMAT-NEXT: vle32.v v14, (a2) +; REMAT-NEXT: csrr a2, vlenb +; REMAT-NEXT: li a5, 12 +; REMAT-NEXT: mul a2, a2, a5 +; REMAT-NEXT: add a2, sp, a2 +; REMAT-NEXT: addi a2, a2, 432 +; REMAT-NEXT: vs2r.v v14, (a2) # Unknown-size Folded Spill ; REMAT-NEXT: add a2, a0, s6 ; REMAT-NEXT: vle32.v v14, (a2) -; REMAT-NEXT: csrr a5, vlenb -; REMAT-NEXT: li a6, 12 -; REMAT-NEXT: mul a5, a5, a6 -; REMAT-NEXT: add a5, sp, a5 -; REMAT-NEXT: addi a5, a5, 432 -; REMAT-NEXT: vl2r.v v0, (a5) # Unknown-size Folded Reload -; REMAT-NEXT: sf.vc.vv 3, 0, v0, v18 -; REMAT-NEXT: vle32.v v0, (a2) -; REMAT-NEXT: add a2, a0, s7 +; REMAT-NEXT: sf.vc.vv 3, 0, v18, v20 ; REMAT-NEXT: vle32.v v18, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v20, v22 +; REMAT-NEXT: add a2, a0, s7 +; REMAT-NEXT: vle32.v v16, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v22, v24 ; REMAT-NEXT: vle32.v v22, (a2) ; REMAT-NEXT: add a2, a0, s8 ; REMAT-NEXT: vle32.v v20, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v24, v26 +; REMAT-NEXT: sf.vc.vv 3, 0, v26, v28 ; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: add a2, a0, s9 ; REMAT-NEXT: vle32.v v24, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v28, v8 -; REMAT-NEXT: vle32.v v28, (a2) -; REMAT-NEXT: add a2, a0, s10 +; REMAT-NEXT: sf.vc.vv 3, 0, v6, v8 ; REMAT-NEXT: vle32.v v8, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v2, v12 -; REMAT-NEXT: vle32.v v12, (a2) -; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: slli a2, a2, 3 -; REMAT-NEXT: add a2, sp, a2 -; REMAT-NEXT: addi a2, a2, 432 -; REMAT-NEXT: vs2r.v v12, (a2) # Unknown-size Folded Spill +; REMAT-NEXT: add a2, a0, s10 +; REMAT-NEXT: vle32.v v6, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v4, v2 +; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: add a2, a0, s11 -; REMAT-NEXT: vle32.v v12, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v30, v16 -; REMAT-NEXT: vle32.v v16, (a2) +; REMAT-NEXT: vle32.v v4, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v0, v10 +; REMAT-NEXT: vle32.v v10, (a2) ; REMAT-NEXT: add a2, a0, ra ; REMAT-NEXT: vle32.v v2, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v6, v10 -; REMAT-NEXT: vle32.v v10, (a2) -; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: slli a2, a2, 1 -; REMAT-NEXT: add a2, sp, a2 -; REMAT-NEXT: addi a2, a2, 432 -; REMAT-NEXT: vs2r.v v10, (a2) # Unknown-size Folded Spill +; REMAT-NEXT: sf.vc.vv 3, 0, v30, v12 +; REMAT-NEXT: vle32.v v12, (a2) ; REMAT-NEXT: add a2, a0, a4 -; REMAT-NEXT: vle32.v v10, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v4, v14 +; REMAT-NEXT: vle32.v v0, (a2) +; REMAT-NEXT: csrr a4, vlenb +; REMAT-NEXT: li a5, 12 +; REMAT-NEXT: mul a4, a4, a5 +; REMAT-NEXT: add a4, sp, a4 +; REMAT-NEXT: addi a4, a4, 432 +; REMAT-NEXT: vl2r.v v30, (a4) # Unknown-size Folded Reload +; REMAT-NEXT: sf.vc.vv 3, 0, v30, v14 ; REMAT-NEXT: vle32.v v14, (a2) -; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: slli a2, a2, 2 -; REMAT-NEXT: add a2, sp, a2 -; REMAT-NEXT: addi a2, a2, 432 +; REMAT-NEXT: addi a2, sp, 432 ; REMAT-NEXT: vs2r.v v14, (a2) # Unknown-size Folded Spill ; REMAT-NEXT: add a2, a0, a3 ; REMAT-NEXT: vle32.v v14, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v0, v18 -; REMAT-NEXT: vle32.v v18, (a2) +; REMAT-NEXT: sf.vc.vv 3, 0, v18, v16 +; REMAT-NEXT: vle32.v v16, (a2) ; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: slli a2, a2, 4 +; REMAT-NEXT: li a3, 12 +; REMAT-NEXT: mul a2, a2, a3 ; REMAT-NEXT: add a2, sp, a2 ; REMAT-NEXT: addi a2, a2, 432 -; REMAT-NEXT: vs2r.v v18, (a2) # Unknown-size Folded Spill +; REMAT-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill ; REMAT-NEXT: li a5, 7 ; REMAT-NEXT: slli a5, a5, 11 ; REMAT-NEXT: add a2, a0, a5 -; REMAT-NEXT: vle32.v v18, (a2) -; REMAT-NEXT: addi a3, sp, 432 -; REMAT-NEXT: vs2r.v v18, (a3) # Unknown-size Folded Spill +; REMAT-NEXT: vle32.v v16, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v20 ; REMAT-NEXT: vle32.v v18, (a2) ; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: li a3, 14 +; REMAT-NEXT: li a3, 10 ; REMAT-NEXT: mul a2, a2, a3 ; REMAT-NEXT: add a2, sp, a2 ; REMAT-NEXT: addi a2, a2, 432 @@ -1150,8 +1111,7 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: sf.vc.vv 3, 0, v26, v24 ; REMAT-NEXT: vle32.v v20, (a2) ; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: li a3, 12 -; REMAT-NEXT: mul a2, a2, a3 +; REMAT-NEXT: slli a2, a2, 3 ; REMAT-NEXT: add a2, sp, a2 ; REMAT-NEXT: addi a2, a2, 432 ; REMAT-NEXT: vs2r.v v20, (a2) # Unknown-size Folded Spill @@ -1159,10 +1119,10 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: slli a2, a2, 10 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v30, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v28, v8 +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v6 ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: li a3, 10 +; REMAT-NEXT: li a3, 6 ; REMAT-NEXT: mul a2, a2, a3 ; REMAT-NEXT: add a2, sp, a2 ; REMAT-NEXT: addi a2, a2, 432 @@ -1171,26 +1131,20 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: slli a2, a2, 9 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v6, (a2) -; REMAT-NEXT: csrr a3, vlenb -; REMAT-NEXT: slli a3, a3, 3 -; REMAT-NEXT: add a3, sp, a3 -; REMAT-NEXT: addi a3, a3, 432 -; REMAT-NEXT: vl2r.v v8, (a3) # Unknown-size Folded Reload -; REMAT-NEXT: sf.vc.vv 3, 0, v8, v12 +; REMAT-NEXT: sf.vc.vv 3, 0, v28, v4 ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: slli a2, a2, 3 +; REMAT-NEXT: slli a2, a2, 2 ; REMAT-NEXT: add a2, sp, a2 ; REMAT-NEXT: addi a2, a2, 432 ; REMAT-NEXT: vs2r.v v8, (a2) # Unknown-size Folded Spill ; REMAT-NEXT: lui a2, 4 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v4, (a2) -; REMAT-NEXT: sf.vc.vv 3, 0, v16, v2 +; REMAT-NEXT: sf.vc.vv 3, 0, v10, v2 ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: csrr a2, vlenb -; REMAT-NEXT: li a3, 6 -; REMAT-NEXT: mul a2, a2, a3 +; REMAT-NEXT: slli a2, a2, 1 ; REMAT-NEXT: add a2, sp, a2 ; REMAT-NEXT: addi a2, a2, 432 ; REMAT-NEXT: vs2r.v v8, (a2) # Unknown-size Folded Spill @@ -1198,21 +1152,13 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: addiw a2, a2, 512 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v2, (a2) -; REMAT-NEXT: csrr a3, vlenb -; REMAT-NEXT: slli a3, a3, 1 -; REMAT-NEXT: add a3, sp, a3 -; REMAT-NEXT: addi a3, a3, 432 -; REMAT-NEXT: vl2r.v v8, (a3) # Unknown-size Folded Reload -; REMAT-NEXT: sf.vc.vv 3, 0, v8, v10 +; REMAT-NEXT: sf.vc.vv 3, 0, v12, v0 ; REMAT-NEXT: vle32.v v20, (a2) ; REMAT-NEXT: li a2, 17 ; REMAT-NEXT: slli a2, a2, 10 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v0, (a2) -; REMAT-NEXT: csrr a3, vlenb -; REMAT-NEXT: slli a3, a3, 2 -; REMAT-NEXT: add a3, sp, a3 -; REMAT-NEXT: addi a3, a3, 432 +; REMAT-NEXT: addi a3, sp, 432 ; REMAT-NEXT: vl2r.v v8, (a3) # Unknown-size Folded Reload ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v14 ; REMAT-NEXT: vle32.v v22, (a2) @@ -1221,20 +1167,19 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: csrr a3, vlenb -; REMAT-NEXT: slli a3, a3, 4 +; REMAT-NEXT: li a4, 12 +; REMAT-NEXT: mul a3, a3, a4 ; REMAT-NEXT: add a3, sp, a3 ; REMAT-NEXT: addi a3, a3, 432 ; REMAT-NEXT: vl2r.v v8, (a3) # Unknown-size Folded Reload -; REMAT-NEXT: addi a3, sp, 432 -; REMAT-NEXT: vl2r.v v10, (a3) # Unknown-size Folded Reload -; REMAT-NEXT: sf.vc.vv 3, 0, v8, v10 +; REMAT-NEXT: sf.vc.vv 3, 0, v8, v16 ; REMAT-NEXT: vle32.v v8, (a2) ; REMAT-NEXT: li a2, 9 ; REMAT-NEXT: slli a2, a2, 11 ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v26, (a2) ; REMAT-NEXT: csrr a3, vlenb -; REMAT-NEXT: li a4, 14 +; REMAT-NEXT: li a4, 10 ; REMAT-NEXT: mul a3, a3, a4 ; REMAT-NEXT: add a3, sp, a3 ; REMAT-NEXT: addi a3, a3, 432 @@ -1246,8 +1191,7 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v28, (a2) ; REMAT-NEXT: csrr a3, vlenb -; REMAT-NEXT: li a4, 12 -; REMAT-NEXT: mul a3, a3, a4 +; REMAT-NEXT: slli a3, a3, 3 ; REMAT-NEXT: add a3, sp, a3 ; REMAT-NEXT: addi a3, a3, 432 ; REMAT-NEXT: vl2r.v v12, (a3) # Unknown-size Folded Reload @@ -1258,7 +1202,7 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v30, (a2) ; REMAT-NEXT: csrr a3, vlenb -; REMAT-NEXT: li a4, 10 +; REMAT-NEXT: li a4, 6 ; REMAT-NEXT: mul a3, a3, a4 ; REMAT-NEXT: add a3, sp, a3 ; REMAT-NEXT: addi a3, a3, 432 @@ -1270,7 +1214,7 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v6, (a2) ; REMAT-NEXT: csrr a3, vlenb -; REMAT-NEXT: slli a3, a3, 3 +; REMAT-NEXT: slli a3, a3, 2 ; REMAT-NEXT: add a3, sp, a3 ; REMAT-NEXT: addi a3, a3, 432 ; REMAT-NEXT: vl2r.v v16, (a3) # Unknown-size Folded Reload @@ -1280,8 +1224,7 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v4, (a2) ; REMAT-NEXT: csrr a3, vlenb -; REMAT-NEXT: li a4, 6 -; REMAT-NEXT: mul a3, a3, a4 +; REMAT-NEXT: slli a3, a3, 1 ; REMAT-NEXT: add a3, sp, a3 ; REMAT-NEXT: addi a3, a3, 432 ; REMAT-NEXT: vl2r.v v18, (a3) # Unknown-size Folded Reload @@ -1293,15 +1236,15 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: vle32.v v2, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v20, v0 ; REMAT-NEXT: vle32.v v20, (a2) -; REMAT-NEXT: li s7, 21 -; REMAT-NEXT: slli s7, s7, 10 -; REMAT-NEXT: add a2, a0, s7 +; REMAT-NEXT: li a2, 21 +; REMAT-NEXT: slli a2, a2, 10 +; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v0, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v22, v24 ; REMAT-NEXT: vle32.v v22, (a2) -; REMAT-NEXT: lui s4, 5 -; REMAT-NEXT: addiw s4, s4, 1536 -; REMAT-NEXT: add a2, a0, s4 +; REMAT-NEXT: lui a2, 5 +; REMAT-NEXT: addiw a2, a2, 1536 +; REMAT-NEXT: add a2, a0, a2 ; REMAT-NEXT: vle32.v v24, (a2) ; REMAT-NEXT: sf.vc.vv 3, 0, v8, v26 ; REMAT-NEXT: vle32.v v8, (a2) @@ -1489,18 +1432,14 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: lui a0, 2 ; REMAT-NEXT: add a0, a1, a0 ; REMAT-NEXT: sd a0, 320(sp) # 8-byte Folded Spill -; REMAT-NEXT: li a0, 17 -; REMAT-NEXT: slli a0, a0, 9 -; REMAT-NEXT: add a0, a1, a0 -; REMAT-NEXT: sd a0, 312(sp) # 8-byte Folded Spill +; REMAT-NEXT: add s4, a1, s4 +; REMAT-NEXT: sd s4, 312(sp) # 8-byte Folded Spill ; REMAT-NEXT: add s5, a1, s5 ; REMAT-NEXT: sd s5, 304(sp) # 8-byte Folded Spill ; REMAT-NEXT: add s6, a1, s6 ; REMAT-NEXT: sd s6, 296(sp) # 8-byte Folded Spill -; REMAT-NEXT: li a0, 5 -; REMAT-NEXT: slli a0, a0, 11 -; REMAT-NEXT: add a0, a1, a0 -; REMAT-NEXT: sd a0, 288(sp) # 8-byte Folded Spill +; REMAT-NEXT: add s7, a1, s7 +; REMAT-NEXT: sd s7, 288(sp) # 8-byte Folded Spill ; REMAT-NEXT: add s8, a1, s8 ; REMAT-NEXT: sd s8, 280(sp) # 8-byte Folded Spill ; REMAT-NEXT: add s9, a1, s9 @@ -1571,10 +1510,14 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: addiw a0, a0, 512 ; REMAT-NEXT: add a0, a1, a0 ; REMAT-NEXT: sd a0, 120(sp) # 8-byte Folded Spill -; REMAT-NEXT: add s7, a1, s7 -; REMAT-NEXT: sd s7, 112(sp) # 8-byte Folded Spill -; REMAT-NEXT: add s4, a1, s4 -; REMAT-NEXT: sd s4, 104(sp) # 8-byte Folded Spill +; REMAT-NEXT: li a0, 21 +; REMAT-NEXT: slli a0, a0, 10 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sd a0, 112(sp) # 8-byte Folded Spill +; REMAT-NEXT: lui a0, 5 +; REMAT-NEXT: addiw a0, a0, 1536 +; REMAT-NEXT: add a0, a1, a0 +; REMAT-NEXT: sd a0, 104(sp) # 8-byte Folded Spill ; REMAT-NEXT: li a0, 11 ; REMAT-NEXT: slli a0, a0, 11 ; REMAT-NEXT: add a0, a1, a0 @@ -1879,7 +1822,7 @@ define void @test(ptr %0, ptr %1, i64 %2) { ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; REMAT-NEXT: sf.vc.v.i 2, 0, v8, 0 ; REMAT-NEXT: csrr a0, vlenb -; REMAT-NEXT: li a1, 18 +; REMAT-NEXT: li a1, 14 ; REMAT-NEXT: mul a0, a0, a1 ; REMAT-NEXT: add sp, sp, a0 ; REMAT-NEXT: .cfi_def_cfa sp, 544 diff --git a/llvm/test/CodeGen/RISCV/push-pop-popret.ll b/llvm/test/CodeGen/RISCV/push-pop-popret.ll index 1fbdaa76dfb68..5ce5849af700c 100644 --- a/llvm/test/CodeGen/RISCV/push-pop-popret.ll +++ b/llvm/test/CodeGen/RISCV/push-pop-popret.ll @@ -1174,16 +1174,16 @@ define i32 @varargs(ptr %fmt, ...) { ; RV64IZCMP: # %bb.0: ; RV64IZCMP-NEXT: addi sp, sp, -80 ; RV64IZCMP-NEXT: .cfi_def_cfa_offset 80 -; RV64IZCMP-NEXT: sd a1, 24(sp) -; RV64IZCMP-NEXT: addi a0, sp, 28 -; RV64IZCMP-NEXT: sd a0, 8(sp) -; RV64IZCMP-NEXT: lw a0, 24(sp) ; RV64IZCMP-NEXT: sd a5, 56(sp) ; RV64IZCMP-NEXT: sd a6, 64(sp) ; RV64IZCMP-NEXT: sd a7, 72(sp) +; RV64IZCMP-NEXT: sd a1, 24(sp) ; RV64IZCMP-NEXT: sd a2, 32(sp) ; RV64IZCMP-NEXT: sd a3, 40(sp) ; RV64IZCMP-NEXT: sd a4, 48(sp) +; RV64IZCMP-NEXT: addi a0, sp, 28 +; RV64IZCMP-NEXT: sd a0, 8(sp) +; RV64IZCMP-NEXT: lw a0, 24(sp) ; RV64IZCMP-NEXT: addi sp, sp, 80 ; RV64IZCMP-NEXT: .cfi_def_cfa_offset 0 ; RV64IZCMP-NEXT: ret @@ -1210,16 +1210,16 @@ define i32 @varargs(ptr %fmt, ...) { ; RV64IZCMP-SR: # %bb.0: ; RV64IZCMP-SR-NEXT: addi sp, sp, -80 ; RV64IZCMP-SR-NEXT: .cfi_def_cfa_offset 80 -; RV64IZCMP-SR-NEXT: sd a1, 24(sp) -; RV64IZCMP-SR-NEXT: addi a0, sp, 28 -; RV64IZCMP-SR-NEXT: sd a0, 8(sp) -; RV64IZCMP-SR-NEXT: lw a0, 24(sp) ; RV64IZCMP-SR-NEXT: sd a5, 56(sp) ; RV64IZCMP-SR-NEXT: sd a6, 64(sp) ; RV64IZCMP-SR-NEXT: sd a7, 72(sp) +; RV64IZCMP-SR-NEXT: sd a1, 24(sp) ; RV64IZCMP-SR-NEXT: sd a2, 32(sp) ; RV64IZCMP-SR-NEXT: sd a3, 40(sp) ; RV64IZCMP-SR-NEXT: sd a4, 48(sp) +; RV64IZCMP-SR-NEXT: addi a0, sp, 28 +; RV64IZCMP-SR-NEXT: sd a0, 8(sp) +; RV64IZCMP-SR-NEXT: lw a0, 24(sp) ; RV64IZCMP-SR-NEXT: addi sp, sp, 80 ; RV64IZCMP-SR-NEXT: .cfi_def_cfa_offset 0 ; RV64IZCMP-SR-NEXT: ret @@ -1246,16 +1246,16 @@ define i32 @varargs(ptr %fmt, ...) { ; RV64I: # %bb.0: ; RV64I-NEXT: addi sp, sp, -80 ; RV64I-NEXT: .cfi_def_cfa_offset 80 -; RV64I-NEXT: sd a1, 24(sp) -; RV64I-NEXT: addi a0, sp, 28 -; RV64I-NEXT: sd a0, 8(sp) -; RV64I-NEXT: lw a0, 24(sp) ; RV64I-NEXT: sd a5, 56(sp) ; RV64I-NEXT: sd a6, 64(sp) ; RV64I-NEXT: sd a7, 72(sp) +; RV64I-NEXT: sd a1, 24(sp) ; RV64I-NEXT: sd a2, 32(sp) ; RV64I-NEXT: sd a3, 40(sp) ; RV64I-NEXT: sd a4, 48(sp) +; RV64I-NEXT: addi a0, sp, 28 +; RV64I-NEXT: sd a0, 8(sp) +; RV64I-NEXT: lw a0, 24(sp) ; RV64I-NEXT: addi sp, sp, 80 ; RV64I-NEXT: .cfi_def_cfa_offset 0 ; RV64I-NEXT: ret @@ -1291,26 +1291,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) { ; RV32IZCMP-NEXT: lw t3, 20(a5) ; RV32IZCMP-NEXT: lw t4, 24(a5) ; RV32IZCMP-NEXT: lw t5, 28(a5) -; RV32IZCMP-NEXT: lw t6, 48(a5) -; RV32IZCMP-NEXT: lw s2, 52(a5) -; RV32IZCMP-NEXT: lw a3, 56(a5) -; RV32IZCMP-NEXT: lw a4, 60(a5) -; RV32IZCMP-NEXT: lw a1, 64(a5) -; RV32IZCMP-NEXT: lw s0, 68(a5) -; RV32IZCMP-NEXT: lw s3, 32(a5) -; RV32IZCMP-NEXT: lw s4, 36(a5) -; RV32IZCMP-NEXT: lw s1, 40(a5) -; RV32IZCMP-NEXT: lw a2, 44(a5) -; RV32IZCMP-NEXT: sw s0, 68(a5) -; RV32IZCMP-NEXT: sw a1, 64(a5) -; RV32IZCMP-NEXT: sw a4, 60(a5) -; RV32IZCMP-NEXT: sw a3, 56(a5) -; RV32IZCMP-NEXT: sw s2, 52(a5) -; RV32IZCMP-NEXT: sw t6, 48(a5) -; RV32IZCMP-NEXT: sw a2, 44(a5) -; RV32IZCMP-NEXT: sw s1, 40(a5) -; RV32IZCMP-NEXT: sw s4, 36(a5) -; RV32IZCMP-NEXT: sw s3, 32(a5) +; RV32IZCMP-NEXT: lw t6, 32(a5) +; RV32IZCMP-NEXT: lw s2, 36(a5) +; RV32IZCMP-NEXT: lw s3, 40(a5) +; RV32IZCMP-NEXT: lw s4, 44(a5) +; RV32IZCMP-NEXT: lw a1, 48(a5) +; RV32IZCMP-NEXT: lw s0, 52(a5) +; RV32IZCMP-NEXT: lw s1, 56(a5) +; RV32IZCMP-NEXT: lw a2, 60(a5) +; RV32IZCMP-NEXT: lw a3, 64(a5) +; RV32IZCMP-NEXT: lw a4, 68(a5) +; RV32IZCMP-NEXT: sw a4, 68(a5) +; RV32IZCMP-NEXT: sw a3, 64(a5) +; RV32IZCMP-NEXT: sw a2, 60(a5) +; RV32IZCMP-NEXT: sw s1, 56(a5) +; RV32IZCMP-NEXT: sw s0, 52(a5) +; RV32IZCMP-NEXT: sw a1, 48(a5) +; RV32IZCMP-NEXT: sw s4, 44(a5) +; RV32IZCMP-NEXT: sw s3, 40(a5) +; RV32IZCMP-NEXT: sw s2, 36(a5) +; RV32IZCMP-NEXT: sw t6, 32(a5) ; RV32IZCMP-NEXT: sw t5, 28(a5) ; RV32IZCMP-NEXT: sw t4, 24(a5) ; RV32IZCMP-NEXT: sw t3, 20(a5) @@ -1340,26 +1340,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) { ; RV64IZCMP-NEXT: lw t3, 20(a5) ; RV64IZCMP-NEXT: lw t4, 24(a5) ; RV64IZCMP-NEXT: lw t5, 28(a5) -; RV64IZCMP-NEXT: lw t6, 48(a5) -; RV64IZCMP-NEXT: lw s2, 52(a5) -; RV64IZCMP-NEXT: lw a3, 56(a5) -; RV64IZCMP-NEXT: lw a4, 60(a5) -; RV64IZCMP-NEXT: lw a1, 64(a5) -; RV64IZCMP-NEXT: lw s0, 68(a5) -; RV64IZCMP-NEXT: lw s3, 32(a5) -; RV64IZCMP-NEXT: lw s4, 36(a5) -; RV64IZCMP-NEXT: lw s1, 40(a5) -; RV64IZCMP-NEXT: lw a2, 44(a5) -; RV64IZCMP-NEXT: sw s0, 68(a5) -; RV64IZCMP-NEXT: sw a1, 64(a5) -; RV64IZCMP-NEXT: sw a4, 60(a5) -; RV64IZCMP-NEXT: sw a3, 56(a5) -; RV64IZCMP-NEXT: sw s2, 52(a5) -; RV64IZCMP-NEXT: sw t6, 48(a5) -; RV64IZCMP-NEXT: sw a2, 44(a5) -; RV64IZCMP-NEXT: sw s1, 40(a5) -; RV64IZCMP-NEXT: sw s4, 36(a5) -; RV64IZCMP-NEXT: sw s3, 32(a5) +; RV64IZCMP-NEXT: lw t6, 32(a5) +; RV64IZCMP-NEXT: lw s2, 36(a5) +; RV64IZCMP-NEXT: lw s3, 40(a5) +; RV64IZCMP-NEXT: lw s4, 44(a5) +; RV64IZCMP-NEXT: lw a1, 48(a5) +; RV64IZCMP-NEXT: lw s0, 52(a5) +; RV64IZCMP-NEXT: lw s1, 56(a5) +; RV64IZCMP-NEXT: lw a2, 60(a5) +; RV64IZCMP-NEXT: lw a3, 64(a5) +; RV64IZCMP-NEXT: lw a4, 68(a5) +; RV64IZCMP-NEXT: sw a4, 68(a5) +; RV64IZCMP-NEXT: sw a3, 64(a5) +; RV64IZCMP-NEXT: sw a2, 60(a5) +; RV64IZCMP-NEXT: sw s1, 56(a5) +; RV64IZCMP-NEXT: sw s0, 52(a5) +; RV64IZCMP-NEXT: sw a1, 48(a5) +; RV64IZCMP-NEXT: sw s4, 44(a5) +; RV64IZCMP-NEXT: sw s3, 40(a5) +; RV64IZCMP-NEXT: sw s2, 36(a5) +; RV64IZCMP-NEXT: sw t6, 32(a5) ; RV64IZCMP-NEXT: sw t5, 28(a5) ; RV64IZCMP-NEXT: sw t4, 24(a5) ; RV64IZCMP-NEXT: sw t3, 20(a5) @@ -1389,26 +1389,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) { ; RV32IZCMP-SR-NEXT: lw t3, 20(a5) ; RV32IZCMP-SR-NEXT: lw t4, 24(a5) ; RV32IZCMP-SR-NEXT: lw t5, 28(a5) -; RV32IZCMP-SR-NEXT: lw t6, 48(a5) -; RV32IZCMP-SR-NEXT: lw s2, 52(a5) -; RV32IZCMP-SR-NEXT: lw a3, 56(a5) -; RV32IZCMP-SR-NEXT: lw a4, 60(a5) -; RV32IZCMP-SR-NEXT: lw a1, 64(a5) -; RV32IZCMP-SR-NEXT: lw s0, 68(a5) -; RV32IZCMP-SR-NEXT: lw s3, 32(a5) -; RV32IZCMP-SR-NEXT: lw s4, 36(a5) -; RV32IZCMP-SR-NEXT: lw s1, 40(a5) -; RV32IZCMP-SR-NEXT: lw a2, 44(a5) -; RV32IZCMP-SR-NEXT: sw s0, 68(a5) -; RV32IZCMP-SR-NEXT: sw a1, 64(a5) -; RV32IZCMP-SR-NEXT: sw a4, 60(a5) -; RV32IZCMP-SR-NEXT: sw a3, 56(a5) -; RV32IZCMP-SR-NEXT: sw s2, 52(a5) -; RV32IZCMP-SR-NEXT: sw t6, 48(a5) -; RV32IZCMP-SR-NEXT: sw a2, 44(a5) -; RV32IZCMP-SR-NEXT: sw s1, 40(a5) -; RV32IZCMP-SR-NEXT: sw s4, 36(a5) -; RV32IZCMP-SR-NEXT: sw s3, 32(a5) +; RV32IZCMP-SR-NEXT: lw t6, 32(a5) +; RV32IZCMP-SR-NEXT: lw s2, 36(a5) +; RV32IZCMP-SR-NEXT: lw s3, 40(a5) +; RV32IZCMP-SR-NEXT: lw s4, 44(a5) +; RV32IZCMP-SR-NEXT: lw a1, 48(a5) +; RV32IZCMP-SR-NEXT: lw s0, 52(a5) +; RV32IZCMP-SR-NEXT: lw s1, 56(a5) +; RV32IZCMP-SR-NEXT: lw a2, 60(a5) +; RV32IZCMP-SR-NEXT: lw a3, 64(a5) +; RV32IZCMP-SR-NEXT: lw a4, 68(a5) +; RV32IZCMP-SR-NEXT: sw a4, 68(a5) +; RV32IZCMP-SR-NEXT: sw a3, 64(a5) +; RV32IZCMP-SR-NEXT: sw a2, 60(a5) +; RV32IZCMP-SR-NEXT: sw s1, 56(a5) +; RV32IZCMP-SR-NEXT: sw s0, 52(a5) +; RV32IZCMP-SR-NEXT: sw a1, 48(a5) +; RV32IZCMP-SR-NEXT: sw s4, 44(a5) +; RV32IZCMP-SR-NEXT: sw s3, 40(a5) +; RV32IZCMP-SR-NEXT: sw s2, 36(a5) +; RV32IZCMP-SR-NEXT: sw t6, 32(a5) ; RV32IZCMP-SR-NEXT: sw t5, 28(a5) ; RV32IZCMP-SR-NEXT: sw t4, 24(a5) ; RV32IZCMP-SR-NEXT: sw t3, 20(a5) @@ -1438,26 +1438,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) { ; RV64IZCMP-SR-NEXT: lw t3, 20(a5) ; RV64IZCMP-SR-NEXT: lw t4, 24(a5) ; RV64IZCMP-SR-NEXT: lw t5, 28(a5) -; RV64IZCMP-SR-NEXT: lw t6, 48(a5) -; RV64IZCMP-SR-NEXT: lw s2, 52(a5) -; RV64IZCMP-SR-NEXT: lw a3, 56(a5) -; RV64IZCMP-SR-NEXT: lw a4, 60(a5) -; RV64IZCMP-SR-NEXT: lw a1, 64(a5) -; RV64IZCMP-SR-NEXT: lw s0, 68(a5) -; RV64IZCMP-SR-NEXT: lw s3, 32(a5) -; RV64IZCMP-SR-NEXT: lw s4, 36(a5) -; RV64IZCMP-SR-NEXT: lw s1, 40(a5) -; RV64IZCMP-SR-NEXT: lw a2, 44(a5) -; RV64IZCMP-SR-NEXT: sw s0, 68(a5) -; RV64IZCMP-SR-NEXT: sw a1, 64(a5) -; RV64IZCMP-SR-NEXT: sw a4, 60(a5) -; RV64IZCMP-SR-NEXT: sw a3, 56(a5) -; RV64IZCMP-SR-NEXT: sw s2, 52(a5) -; RV64IZCMP-SR-NEXT: sw t6, 48(a5) -; RV64IZCMP-SR-NEXT: sw a2, 44(a5) -; RV64IZCMP-SR-NEXT: sw s1, 40(a5) -; RV64IZCMP-SR-NEXT: sw s4, 36(a5) -; RV64IZCMP-SR-NEXT: sw s3, 32(a5) +; RV64IZCMP-SR-NEXT: lw t6, 32(a5) +; RV64IZCMP-SR-NEXT: lw s2, 36(a5) +; RV64IZCMP-SR-NEXT: lw s3, 40(a5) +; RV64IZCMP-SR-NEXT: lw s4, 44(a5) +; RV64IZCMP-SR-NEXT: lw a1, 48(a5) +; RV64IZCMP-SR-NEXT: lw s0, 52(a5) +; RV64IZCMP-SR-NEXT: lw s1, 56(a5) +; RV64IZCMP-SR-NEXT: lw a2, 60(a5) +; RV64IZCMP-SR-NEXT: lw a3, 64(a5) +; RV64IZCMP-SR-NEXT: lw a4, 68(a5) +; RV64IZCMP-SR-NEXT: sw a4, 68(a5) +; RV64IZCMP-SR-NEXT: sw a3, 64(a5) +; RV64IZCMP-SR-NEXT: sw a2, 60(a5) +; RV64IZCMP-SR-NEXT: sw s1, 56(a5) +; RV64IZCMP-SR-NEXT: sw s0, 52(a5) +; RV64IZCMP-SR-NEXT: sw a1, 48(a5) +; RV64IZCMP-SR-NEXT: sw s4, 44(a5) +; RV64IZCMP-SR-NEXT: sw s3, 40(a5) +; RV64IZCMP-SR-NEXT: sw s2, 36(a5) +; RV64IZCMP-SR-NEXT: sw t6, 32(a5) ; RV64IZCMP-SR-NEXT: sw t5, 28(a5) ; RV64IZCMP-SR-NEXT: sw t4, 24(a5) ; RV64IZCMP-SR-NEXT: sw t3, 20(a5) @@ -1492,26 +1492,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) { ; RV32I-NEXT: lw a7, 20(a5) ; RV32I-NEXT: lw t0, 24(a5) ; RV32I-NEXT: lw t1, 28(a5) -; RV32I-NEXT: lw t2, 48(a5) -; RV32I-NEXT: lw t3, 52(a5) -; RV32I-NEXT: lw t4, 56(a5) -; RV32I-NEXT: lw t5, 60(a5) -; RV32I-NEXT: lw t6, 64(a5) -; RV32I-NEXT: lw s0, 68(a5) -; RV32I-NEXT: lw s1, 32(a5) -; RV32I-NEXT: lw s2, 36(a5) -; RV32I-NEXT: lw s3, 40(a5) -; RV32I-NEXT: lw s4, 44(a5) -; RV32I-NEXT: sw s0, 68(a5) -; RV32I-NEXT: sw t6, 64(a5) -; RV32I-NEXT: sw t5, 60(a5) -; RV32I-NEXT: sw t4, 56(a5) -; RV32I-NEXT: sw t3, 52(a5) -; RV32I-NEXT: sw t2, 48(a5) -; RV32I-NEXT: sw s4, 44(a5) -; RV32I-NEXT: sw s3, 40(a5) -; RV32I-NEXT: sw s2, 36(a5) -; RV32I-NEXT: sw s1, 32(a5) +; RV32I-NEXT: lw t2, 32(a5) +; RV32I-NEXT: lw t3, 36(a5) +; RV32I-NEXT: lw t4, 40(a5) +; RV32I-NEXT: lw t5, 44(a5) +; RV32I-NEXT: lw t6, 48(a5) +; RV32I-NEXT: lw s0, 52(a5) +; RV32I-NEXT: lw s1, 56(a5) +; RV32I-NEXT: lw s2, 60(a5) +; RV32I-NEXT: lw s3, 64(a5) +; RV32I-NEXT: lw s4, 68(a5) +; RV32I-NEXT: sw s4, 68(a5) +; RV32I-NEXT: sw s3, 64(a5) +; RV32I-NEXT: sw s2, 60(a5) +; RV32I-NEXT: sw s1, 56(a5) +; RV32I-NEXT: sw s0, 52(a5) +; RV32I-NEXT: sw t6, 48(a5) +; RV32I-NEXT: sw t5, 44(a5) +; RV32I-NEXT: sw t4, 40(a5) +; RV32I-NEXT: sw t3, 36(a5) +; RV32I-NEXT: sw t2, 32(a5) ; RV32I-NEXT: sw t1, 28(a5) ; RV32I-NEXT: sw t0, 24(a5) ; RV32I-NEXT: sw a7, 20(a5) @@ -1558,26 +1558,26 @@ define void @many_args(i32, i32, i32, i32, i32, i32, i32, i32, i32) { ; RV64I-NEXT: lw a7, 20(a5) ; RV64I-NEXT: lw t0, 24(a5) ; RV64I-NEXT: lw t1, 28(a5) -; RV64I-NEXT: lw t2, 48(a5) -; RV64I-NEXT: lw t3, 52(a5) -; RV64I-NEXT: lw t4, 56(a5) -; RV64I-NEXT: lw t5, 60(a5) -; RV64I-NEXT: lw t6, 64(a5) -; RV64I-NEXT: lw s0, 68(a5) -; RV64I-NEXT: lw s1, 32(a5) -; RV64I-NEXT: lw s2, 36(a5) -; RV64I-NEXT: lw s3, 40(a5) -; RV64I-NEXT: lw s4, 44(a5) -; RV64I-NEXT: sw s0, 68(a5) -; RV64I-NEXT: sw t6, 64(a5) -; RV64I-NEXT: sw t5, 60(a5) -; RV64I-NEXT: sw t4, 56(a5) -; RV64I-NEXT: sw t3, 52(a5) -; RV64I-NEXT: sw t2, 48(a5) -; RV64I-NEXT: sw s4, 44(a5) -; RV64I-NEXT: sw s3, 40(a5) -; RV64I-NEXT: sw s2, 36(a5) -; RV64I-NEXT: sw s1, 32(a5) +; RV64I-NEXT: lw t2, 32(a5) +; RV64I-NEXT: lw t3, 36(a5) +; RV64I-NEXT: lw t4, 40(a5) +; RV64I-NEXT: lw t5, 44(a5) +; RV64I-NEXT: lw t6, 48(a5) +; RV64I-NEXT: lw s0, 52(a5) +; RV64I-NEXT: lw s1, 56(a5) +; RV64I-NEXT: lw s2, 60(a5) +; RV64I-NEXT: lw s3, 64(a5) +; RV64I-NEXT: lw s4, 68(a5) +; RV64I-NEXT: sw s4, 68(a5) +; RV64I-NEXT: sw s3, 64(a5) +; RV64I-NEXT: sw s2, 60(a5) +; RV64I-NEXT: sw s1, 56(a5) +; RV64I-NEXT: sw s0, 52(a5) +; RV64I-NEXT: sw t6, 48(a5) +; RV64I-NEXT: sw t5, 44(a5) +; RV64I-NEXT: sw t4, 40(a5) +; RV64I-NEXT: sw t3, 36(a5) +; RV64I-NEXT: sw t2, 32(a5) ; RV64I-NEXT: sw t1, 28(a5) ; RV64I-NEXT: sw t0, 24(a5) ; RV64I-NEXT: sw a7, 20(a5) @@ -2323,16 +2323,16 @@ define void @callee_with_irq() "interrupt"="user" { ; RV32IZCMP-NEXT: .cfi_offset t4, -104 ; RV32IZCMP-NEXT: .cfi_offset t5, -108 ; RV32IZCMP-NEXT: .cfi_offset t6, -112 -; RV32IZCMP-NEXT: lui t0, %hi(var_test_irq) -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0) +; RV32IZCMP-NEXT: lui a4, %hi(var_test_irq) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV32IZCMP-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: addi a5, t0, %lo(var_test_irq) +; RV32IZCMP-NEXT: addi a5, a4, %lo(var_test_irq) ; RV32IZCMP-NEXT: lw a0, 16(a5) ; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill ; RV32IZCMP-NEXT: lw a0, 20(a5) @@ -2352,28 +2352,28 @@ define void @callee_with_irq() "interrupt"="user" { ; RV32IZCMP-NEXT: lw s11, 72(a5) ; RV32IZCMP-NEXT: lw ra, 76(a5) ; RV32IZCMP-NEXT: lw s1, 80(a5) -; RV32IZCMP-NEXT: lw t3, 84(a5) -; RV32IZCMP-NEXT: lw t2, 88(a5) -; RV32IZCMP-NEXT: lw t1, 92(a5) -; RV32IZCMP-NEXT: lw a7, 112(a5) -; RV32IZCMP-NEXT: lw s0, 116(a5) -; RV32IZCMP-NEXT: lw a3, 120(a5) -; RV32IZCMP-NEXT: lw a0, 124(a5) -; RV32IZCMP-NEXT: lw a6, 96(a5) -; RV32IZCMP-NEXT: lw a4, 100(a5) -; RV32IZCMP-NEXT: lw a2, 104(a5) -; RV32IZCMP-NEXT: lw a1, 108(a5) -; RV32IZCMP-NEXT: sw a0, 124(a5) -; RV32IZCMP-NEXT: sw a3, 120(a5) -; RV32IZCMP-NEXT: sw s0, 116(a5) -; RV32IZCMP-NEXT: sw a7, 112(a5) -; RV32IZCMP-NEXT: sw a1, 108(a5) -; RV32IZCMP-NEXT: sw a2, 104(a5) -; RV32IZCMP-NEXT: sw a4, 100(a5) -; RV32IZCMP-NEXT: sw a6, 96(a5) -; RV32IZCMP-NEXT: sw t1, 92(a5) -; RV32IZCMP-NEXT: sw t2, 88(a5) -; RV32IZCMP-NEXT: sw t3, 84(a5) +; RV32IZCMP-NEXT: lw t2, 84(a5) +; RV32IZCMP-NEXT: lw t1, 88(a5) +; RV32IZCMP-NEXT: lw t0, 92(a5) +; RV32IZCMP-NEXT: lw a7, 96(a5) +; RV32IZCMP-NEXT: lw s0, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a3, 108(a5) +; RV32IZCMP-NEXT: lw a2, 112(a5) +; RV32IZCMP-NEXT: lw a1, 116(a5) +; RV32IZCMP-NEXT: lw a0, 120(a5) +; RV32IZCMP-NEXT: lw t3, 124(a5) +; RV32IZCMP-NEXT: sw t3, 124(a5) +; RV32IZCMP-NEXT: sw a0, 120(a5) +; RV32IZCMP-NEXT: sw a1, 116(a5) +; RV32IZCMP-NEXT: sw a2, 112(a5) +; RV32IZCMP-NEXT: sw a3, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw s0, 100(a5) +; RV32IZCMP-NEXT: sw a7, 96(a5) +; RV32IZCMP-NEXT: sw t0, 92(a5) +; RV32IZCMP-NEXT: sw t1, 88(a5) +; RV32IZCMP-NEXT: sw t2, 84(a5) ; RV32IZCMP-NEXT: sw s1, 80(a5) ; RV32IZCMP-NEXT: sw ra, 76(a5) ; RV32IZCMP-NEXT: sw s11, 72(a5) @@ -2394,13 +2394,13 @@ define void @callee_with_irq() "interrupt"="user" { ; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: sw a0, 16(a5) ; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV32IZCMP-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV32IZCMP-NEXT: lw t0, 88(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: lw t1, 84(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: lw t2, 80(sp) # 4-byte Folded Reload @@ -2499,16 +2499,16 @@ define void @callee_with_irq() "interrupt"="user" { ; RV64IZCMP-NEXT: .cfi_offset t4, -208 ; RV64IZCMP-NEXT: .cfi_offset t5, -216 ; RV64IZCMP-NEXT: .cfi_offset t6, -224 -; RV64IZCMP-NEXT: lui t0, %hi(var_test_irq) -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0) +; RV64IZCMP-NEXT: lui a4, %hi(var_test_irq) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: addi a5, t0, %lo(var_test_irq) +; RV64IZCMP-NEXT: addi a5, a4, %lo(var_test_irq) ; RV64IZCMP-NEXT: lw a0, 16(a5) ; RV64IZCMP-NEXT: sd a0, 8(sp) # 8-byte Folded Spill ; RV64IZCMP-NEXT: lw a0, 20(a5) @@ -2528,28 +2528,28 @@ define void @callee_with_irq() "interrupt"="user" { ; RV64IZCMP-NEXT: lw s11, 72(a5) ; RV64IZCMP-NEXT: lw ra, 76(a5) ; RV64IZCMP-NEXT: lw s1, 80(a5) -; RV64IZCMP-NEXT: lw t3, 84(a5) -; RV64IZCMP-NEXT: lw t2, 88(a5) -; RV64IZCMP-NEXT: lw t1, 92(a5) -; RV64IZCMP-NEXT: lw a7, 112(a5) -; RV64IZCMP-NEXT: lw s0, 116(a5) -; RV64IZCMP-NEXT: lw a3, 120(a5) -; RV64IZCMP-NEXT: lw a0, 124(a5) -; RV64IZCMP-NEXT: lw a6, 96(a5) -; RV64IZCMP-NEXT: lw a4, 100(a5) -; RV64IZCMP-NEXT: lw a2, 104(a5) -; RV64IZCMP-NEXT: lw a1, 108(a5) -; RV64IZCMP-NEXT: sw a0, 124(a5) -; RV64IZCMP-NEXT: sw a3, 120(a5) -; RV64IZCMP-NEXT: sw s0, 116(a5) -; RV64IZCMP-NEXT: sw a7, 112(a5) -; RV64IZCMP-NEXT: sw a1, 108(a5) -; RV64IZCMP-NEXT: sw a2, 104(a5) -; RV64IZCMP-NEXT: sw a4, 100(a5) -; RV64IZCMP-NEXT: sw a6, 96(a5) -; RV64IZCMP-NEXT: sw t1, 92(a5) -; RV64IZCMP-NEXT: sw t2, 88(a5) -; RV64IZCMP-NEXT: sw t3, 84(a5) +; RV64IZCMP-NEXT: lw t2, 84(a5) +; RV64IZCMP-NEXT: lw t1, 88(a5) +; RV64IZCMP-NEXT: lw t0, 92(a5) +; RV64IZCMP-NEXT: lw a7, 96(a5) +; RV64IZCMP-NEXT: lw s0, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a3, 108(a5) +; RV64IZCMP-NEXT: lw a2, 112(a5) +; RV64IZCMP-NEXT: lw a1, 116(a5) +; RV64IZCMP-NEXT: lw a0, 120(a5) +; RV64IZCMP-NEXT: lw t3, 124(a5) +; RV64IZCMP-NEXT: sw t3, 124(a5) +; RV64IZCMP-NEXT: sw a0, 120(a5) +; RV64IZCMP-NEXT: sw a1, 116(a5) +; RV64IZCMP-NEXT: sw a2, 112(a5) +; RV64IZCMP-NEXT: sw a3, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw s0, 100(a5) +; RV64IZCMP-NEXT: sw a7, 96(a5) +; RV64IZCMP-NEXT: sw t0, 92(a5) +; RV64IZCMP-NEXT: sw t1, 88(a5) +; RV64IZCMP-NEXT: sw t2, 84(a5) ; RV64IZCMP-NEXT: sw s1, 80(a5) ; RV64IZCMP-NEXT: sw ra, 76(a5) ; RV64IZCMP-NEXT: sw s11, 72(a5) @@ -2570,13 +2570,13 @@ define void @callee_with_irq() "interrupt"="user" { ; RV64IZCMP-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: sw a0, 16(a5) ; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV64IZCMP-NEXT: ld t0, 160(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: ld t1, 152(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: ld t2, 144(sp) # 8-byte Folded Reload @@ -2675,16 +2675,16 @@ define void @callee_with_irq() "interrupt"="user" { ; RV32IZCMP-SR-NEXT: .cfi_offset t4, -104 ; RV32IZCMP-SR-NEXT: .cfi_offset t5, -108 ; RV32IZCMP-SR-NEXT: .cfi_offset t6, -112 -; RV32IZCMP-SR-NEXT: lui t0, %hi(var_test_irq) -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0) +; RV32IZCMP-SR-NEXT: lui a4, %hi(var_test_irq) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV32IZCMP-SR-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV32IZCMP-SR-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV32IZCMP-SR-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV32IZCMP-SR-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq) +; RV32IZCMP-SR-NEXT: addi a5, a4, %lo(var_test_irq) ; RV32IZCMP-SR-NEXT: lw a0, 16(a5) ; RV32IZCMP-SR-NEXT: sw a0, 12(sp) # 4-byte Folded Spill ; RV32IZCMP-SR-NEXT: lw a0, 20(a5) @@ -2704,28 +2704,28 @@ define void @callee_with_irq() "interrupt"="user" { ; RV32IZCMP-SR-NEXT: lw s11, 72(a5) ; RV32IZCMP-SR-NEXT: lw ra, 76(a5) ; RV32IZCMP-SR-NEXT: lw s1, 80(a5) -; RV32IZCMP-SR-NEXT: lw t3, 84(a5) -; RV32IZCMP-SR-NEXT: lw t2, 88(a5) -; RV32IZCMP-SR-NEXT: lw t1, 92(a5) -; RV32IZCMP-SR-NEXT: lw a7, 112(a5) -; RV32IZCMP-SR-NEXT: lw s0, 116(a5) -; RV32IZCMP-SR-NEXT: lw a3, 120(a5) -; RV32IZCMP-SR-NEXT: lw a0, 124(a5) -; RV32IZCMP-SR-NEXT: lw a6, 96(a5) -; RV32IZCMP-SR-NEXT: lw a4, 100(a5) -; RV32IZCMP-SR-NEXT: lw a2, 104(a5) -; RV32IZCMP-SR-NEXT: lw a1, 108(a5) -; RV32IZCMP-SR-NEXT: sw a0, 124(a5) -; RV32IZCMP-SR-NEXT: sw a3, 120(a5) -; RV32IZCMP-SR-NEXT: sw s0, 116(a5) -; RV32IZCMP-SR-NEXT: sw a7, 112(a5) -; RV32IZCMP-SR-NEXT: sw a1, 108(a5) -; RV32IZCMP-SR-NEXT: sw a2, 104(a5) -; RV32IZCMP-SR-NEXT: sw a4, 100(a5) -; RV32IZCMP-SR-NEXT: sw a6, 96(a5) -; RV32IZCMP-SR-NEXT: sw t1, 92(a5) -; RV32IZCMP-SR-NEXT: sw t2, 88(a5) -; RV32IZCMP-SR-NEXT: sw t3, 84(a5) +; RV32IZCMP-SR-NEXT: lw t2, 84(a5) +; RV32IZCMP-SR-NEXT: lw t1, 88(a5) +; RV32IZCMP-SR-NEXT: lw t0, 92(a5) +; RV32IZCMP-SR-NEXT: lw a7, 96(a5) +; RV32IZCMP-SR-NEXT: lw s0, 100(a5) +; RV32IZCMP-SR-NEXT: lw a6, 104(a5) +; RV32IZCMP-SR-NEXT: lw a3, 108(a5) +; RV32IZCMP-SR-NEXT: lw a2, 112(a5) +; RV32IZCMP-SR-NEXT: lw a1, 116(a5) +; RV32IZCMP-SR-NEXT: lw a0, 120(a5) +; RV32IZCMP-SR-NEXT: lw t3, 124(a5) +; RV32IZCMP-SR-NEXT: sw t3, 124(a5) +; RV32IZCMP-SR-NEXT: sw a0, 120(a5) +; RV32IZCMP-SR-NEXT: sw a1, 116(a5) +; RV32IZCMP-SR-NEXT: sw a2, 112(a5) +; RV32IZCMP-SR-NEXT: sw a3, 108(a5) +; RV32IZCMP-SR-NEXT: sw a6, 104(a5) +; RV32IZCMP-SR-NEXT: sw s0, 100(a5) +; RV32IZCMP-SR-NEXT: sw a7, 96(a5) +; RV32IZCMP-SR-NEXT: sw t0, 92(a5) +; RV32IZCMP-SR-NEXT: sw t1, 88(a5) +; RV32IZCMP-SR-NEXT: sw t2, 84(a5) ; RV32IZCMP-SR-NEXT: sw s1, 80(a5) ; RV32IZCMP-SR-NEXT: sw ra, 76(a5) ; RV32IZCMP-SR-NEXT: sw s11, 72(a5) @@ -2746,13 +2746,13 @@ define void @callee_with_irq() "interrupt"="user" { ; RV32IZCMP-SR-NEXT: lw a0, 12(sp) # 4-byte Folded Reload ; RV32IZCMP-SR-NEXT: sw a0, 16(a5) ; RV32IZCMP-SR-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV32IZCMP-SR-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV32IZCMP-SR-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV32IZCMP-SR-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV32IZCMP-SR-NEXT: lw t0, 88(sp) # 4-byte Folded Reload ; RV32IZCMP-SR-NEXT: lw t1, 84(sp) # 4-byte Folded Reload ; RV32IZCMP-SR-NEXT: lw t2, 80(sp) # 4-byte Folded Reload @@ -2851,16 +2851,16 @@ define void @callee_with_irq() "interrupt"="user" { ; RV64IZCMP-SR-NEXT: .cfi_offset t4, -208 ; RV64IZCMP-SR-NEXT: .cfi_offset t5, -216 ; RV64IZCMP-SR-NEXT: .cfi_offset t6, -224 -; RV64IZCMP-SR-NEXT: lui t0, %hi(var_test_irq) -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0) +; RV64IZCMP-SR-NEXT: lui a4, %hi(var_test_irq) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV64IZCMP-SR-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV64IZCMP-SR-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV64IZCMP-SR-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV64IZCMP-SR-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq) +; RV64IZCMP-SR-NEXT: addi a5, a4, %lo(var_test_irq) ; RV64IZCMP-SR-NEXT: lw a0, 16(a5) ; RV64IZCMP-SR-NEXT: sd a0, 8(sp) # 8-byte Folded Spill ; RV64IZCMP-SR-NEXT: lw a0, 20(a5) @@ -2880,28 +2880,28 @@ define void @callee_with_irq() "interrupt"="user" { ; RV64IZCMP-SR-NEXT: lw s11, 72(a5) ; RV64IZCMP-SR-NEXT: lw ra, 76(a5) ; RV64IZCMP-SR-NEXT: lw s1, 80(a5) -; RV64IZCMP-SR-NEXT: lw t3, 84(a5) -; RV64IZCMP-SR-NEXT: lw t2, 88(a5) -; RV64IZCMP-SR-NEXT: lw t1, 92(a5) -; RV64IZCMP-SR-NEXT: lw a7, 112(a5) -; RV64IZCMP-SR-NEXT: lw s0, 116(a5) -; RV64IZCMP-SR-NEXT: lw a3, 120(a5) -; RV64IZCMP-SR-NEXT: lw a0, 124(a5) -; RV64IZCMP-SR-NEXT: lw a6, 96(a5) -; RV64IZCMP-SR-NEXT: lw a4, 100(a5) -; RV64IZCMP-SR-NEXT: lw a2, 104(a5) -; RV64IZCMP-SR-NEXT: lw a1, 108(a5) -; RV64IZCMP-SR-NEXT: sw a0, 124(a5) -; RV64IZCMP-SR-NEXT: sw a3, 120(a5) -; RV64IZCMP-SR-NEXT: sw s0, 116(a5) -; RV64IZCMP-SR-NEXT: sw a7, 112(a5) -; RV64IZCMP-SR-NEXT: sw a1, 108(a5) -; RV64IZCMP-SR-NEXT: sw a2, 104(a5) -; RV64IZCMP-SR-NEXT: sw a4, 100(a5) -; RV64IZCMP-SR-NEXT: sw a6, 96(a5) -; RV64IZCMP-SR-NEXT: sw t1, 92(a5) -; RV64IZCMP-SR-NEXT: sw t2, 88(a5) -; RV64IZCMP-SR-NEXT: sw t3, 84(a5) +; RV64IZCMP-SR-NEXT: lw t2, 84(a5) +; RV64IZCMP-SR-NEXT: lw t1, 88(a5) +; RV64IZCMP-SR-NEXT: lw t0, 92(a5) +; RV64IZCMP-SR-NEXT: lw a7, 96(a5) +; RV64IZCMP-SR-NEXT: lw s0, 100(a5) +; RV64IZCMP-SR-NEXT: lw a6, 104(a5) +; RV64IZCMP-SR-NEXT: lw a3, 108(a5) +; RV64IZCMP-SR-NEXT: lw a2, 112(a5) +; RV64IZCMP-SR-NEXT: lw a1, 116(a5) +; RV64IZCMP-SR-NEXT: lw a0, 120(a5) +; RV64IZCMP-SR-NEXT: lw t3, 124(a5) +; RV64IZCMP-SR-NEXT: sw t3, 124(a5) +; RV64IZCMP-SR-NEXT: sw a0, 120(a5) +; RV64IZCMP-SR-NEXT: sw a1, 116(a5) +; RV64IZCMP-SR-NEXT: sw a2, 112(a5) +; RV64IZCMP-SR-NEXT: sw a3, 108(a5) +; RV64IZCMP-SR-NEXT: sw a6, 104(a5) +; RV64IZCMP-SR-NEXT: sw s0, 100(a5) +; RV64IZCMP-SR-NEXT: sw a7, 96(a5) +; RV64IZCMP-SR-NEXT: sw t0, 92(a5) +; RV64IZCMP-SR-NEXT: sw t1, 88(a5) +; RV64IZCMP-SR-NEXT: sw t2, 84(a5) ; RV64IZCMP-SR-NEXT: sw s1, 80(a5) ; RV64IZCMP-SR-NEXT: sw ra, 76(a5) ; RV64IZCMP-SR-NEXT: sw s11, 72(a5) @@ -2922,13 +2922,13 @@ define void @callee_with_irq() "interrupt"="user" { ; RV64IZCMP-SR-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64IZCMP-SR-NEXT: sw a0, 16(a5) ; RV64IZCMP-SR-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV64IZCMP-SR-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV64IZCMP-SR-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV64IZCMP-SR-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV64IZCMP-SR-NEXT: ld t0, 160(sp) # 8-byte Folded Reload ; RV64IZCMP-SR-NEXT: ld t1, 152(sp) # 8-byte Folded Reload ; RV64IZCMP-SR-NEXT: ld t2, 144(sp) # 8-byte Folded Reload @@ -3038,16 +3038,16 @@ define void @callee_with_irq() "interrupt"="user" { ; RV32I-NEXT: .cfi_offset t4, -104 ; RV32I-NEXT: .cfi_offset t5, -108 ; RV32I-NEXT: .cfi_offset t6, -112 -; RV32I-NEXT: lui a7, %hi(var_test_irq) -; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: lui a4, %hi(var_test_irq) +; RV32I-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV32I-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32I-NEXT: addi a5, a4, %lo(var_test_irq) ; RV32I-NEXT: lw a0, 16(a5) ; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, 20(a5) @@ -3070,22 +3070,22 @@ define void @callee_with_irq() "interrupt"="user" { ; RV32I-NEXT: lw s8, 84(a5) ; RV32I-NEXT: lw s9, 88(a5) ; RV32I-NEXT: lw s10, 92(a5) -; RV32I-NEXT: lw s11, 112(a5) -; RV32I-NEXT: lw ra, 116(a5) -; RV32I-NEXT: lw a3, 120(a5) -; RV32I-NEXT: lw a0, 124(a5) -; RV32I-NEXT: lw a6, 96(a5) -; RV32I-NEXT: lw a4, 100(a5) -; RV32I-NEXT: lw a2, 104(a5) -; RV32I-NEXT: lw a1, 108(a5) -; RV32I-NEXT: sw a0, 124(a5) -; RV32I-NEXT: sw a3, 120(a5) -; RV32I-NEXT: sw ra, 116(a5) -; RV32I-NEXT: sw s11, 112(a5) -; RV32I-NEXT: sw a1, 108(a5) -; RV32I-NEXT: sw a2, 104(a5) -; RV32I-NEXT: sw a4, 100(a5) -; RV32I-NEXT: sw a6, 96(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) +; RV32I-NEXT: lw a6, 104(a5) +; RV32I-NEXT: lw a3, 108(a5) +; RV32I-NEXT: lw a2, 112(a5) +; RV32I-NEXT: lw a1, 116(a5) +; RV32I-NEXT: lw a0, 120(a5) +; RV32I-NEXT: lw a7, 124(a5) +; RV32I-NEXT: sw a7, 124(a5) +; RV32I-NEXT: sw a0, 120(a5) +; RV32I-NEXT: sw a1, 116(a5) +; RV32I-NEXT: sw a2, 112(a5) +; RV32I-NEXT: sw a3, 108(a5) +; RV32I-NEXT: sw a6, 104(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) ; RV32I-NEXT: sw s10, 92(a5) ; RV32I-NEXT: sw s9, 88(a5) ; RV32I-NEXT: sw s8, 84(a5) @@ -3109,13 +3109,13 @@ define void @callee_with_irq() "interrupt"="user" { ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: sw a0, 16(a5) ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV32I-NEXT: lw a0, 28(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV32I-NEXT: lw ra, 140(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw t0, 136(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw t1, 132(sp) # 4-byte Folded Reload @@ -3236,16 +3236,16 @@ define void @callee_with_irq() "interrupt"="user" { ; RV64I-NEXT: .cfi_offset t4, -208 ; RV64I-NEXT: .cfi_offset t5, -216 ; RV64I-NEXT: .cfi_offset t6, -224 -; RV64I-NEXT: lui a7, %hi(var_test_irq) -; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: lui a4, %hi(var_test_irq) +; RV64I-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64I-NEXT: addi a5, a4, %lo(var_test_irq) ; RV64I-NEXT: lw a0, 16(a5) ; RV64I-NEXT: sd a0, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 20(a5) @@ -3268,22 +3268,22 @@ define void @callee_with_irq() "interrupt"="user" { ; RV64I-NEXT: lw s8, 84(a5) ; RV64I-NEXT: lw s9, 88(a5) ; RV64I-NEXT: lw s10, 92(a5) -; RV64I-NEXT: lw s11, 112(a5) -; RV64I-NEXT: lw ra, 116(a5) -; RV64I-NEXT: lw a3, 120(a5) -; RV64I-NEXT: lw a0, 124(a5) -; RV64I-NEXT: lw a6, 96(a5) -; RV64I-NEXT: lw a4, 100(a5) -; RV64I-NEXT: lw a2, 104(a5) -; RV64I-NEXT: lw a1, 108(a5) -; RV64I-NEXT: sw a0, 124(a5) -; RV64I-NEXT: sw a3, 120(a5) -; RV64I-NEXT: sw ra, 116(a5) -; RV64I-NEXT: sw s11, 112(a5) -; RV64I-NEXT: sw a1, 108(a5) -; RV64I-NEXT: sw a2, 104(a5) -; RV64I-NEXT: sw a4, 100(a5) -; RV64I-NEXT: sw a6, 96(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) +; RV64I-NEXT: lw a6, 104(a5) +; RV64I-NEXT: lw a3, 108(a5) +; RV64I-NEXT: lw a2, 112(a5) +; RV64I-NEXT: lw a1, 116(a5) +; RV64I-NEXT: lw a0, 120(a5) +; RV64I-NEXT: lw a7, 124(a5) +; RV64I-NEXT: sw a7, 124(a5) +; RV64I-NEXT: sw a0, 120(a5) +; RV64I-NEXT: sw a1, 116(a5) +; RV64I-NEXT: sw a2, 112(a5) +; RV64I-NEXT: sw a3, 108(a5) +; RV64I-NEXT: sw a6, 104(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) ; RV64I-NEXT: sw s10, 92(a5) ; RV64I-NEXT: sw s9, 88(a5) ; RV64I-NEXT: sw s8, 84(a5) @@ -3307,13 +3307,13 @@ define void @callee_with_irq() "interrupt"="user" { ; RV64I-NEXT: ld a0, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: sw a0, 16(a5) ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV64I-NEXT: ld ra, 264(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld t0, 256(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld t1, 248(sp) # 8-byte Folded Reload @@ -3396,16 +3396,16 @@ define void @callee_no_irq() { ; RV32IZCMP-NEXT: .cfi_offset s9, -12 ; RV32IZCMP-NEXT: .cfi_offset s10, -8 ; RV32IZCMP-NEXT: .cfi_offset s11, -4 -; RV32IZCMP-NEXT: lui t0, %hi(var_test_irq) -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0) +; RV32IZCMP-NEXT: lui a4, %hi(var_test_irq) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV32IZCMP-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV32IZCMP-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV32IZCMP-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0) +; RV32IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV32IZCMP-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32IZCMP-NEXT: addi a5, t0, %lo(var_test_irq) +; RV32IZCMP-NEXT: addi a5, a4, %lo(var_test_irq) ; RV32IZCMP-NEXT: lw a0, 16(a5) ; RV32IZCMP-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32IZCMP-NEXT: lw a0, 20(a5) @@ -3425,28 +3425,28 @@ define void @callee_no_irq() { ; RV32IZCMP-NEXT: lw s11, 72(a5) ; RV32IZCMP-NEXT: lw ra, 76(a5) ; RV32IZCMP-NEXT: lw s1, 80(a5) -; RV32IZCMP-NEXT: lw t3, 84(a5) -; RV32IZCMP-NEXT: lw t2, 88(a5) -; RV32IZCMP-NEXT: lw t1, 92(a5) -; RV32IZCMP-NEXT: lw a7, 112(a5) -; RV32IZCMP-NEXT: lw s0, 116(a5) -; RV32IZCMP-NEXT: lw a3, 120(a5) -; RV32IZCMP-NEXT: lw a0, 124(a5) -; RV32IZCMP-NEXT: lw a6, 96(a5) -; RV32IZCMP-NEXT: lw a4, 100(a5) -; RV32IZCMP-NEXT: lw a2, 104(a5) -; RV32IZCMP-NEXT: lw a1, 108(a5) -; RV32IZCMP-NEXT: sw a0, 124(a5) -; RV32IZCMP-NEXT: sw a3, 120(a5) -; RV32IZCMP-NEXT: sw s0, 116(a5) -; RV32IZCMP-NEXT: sw a7, 112(a5) -; RV32IZCMP-NEXT: sw a1, 108(a5) -; RV32IZCMP-NEXT: sw a2, 104(a5) -; RV32IZCMP-NEXT: sw a4, 100(a5) -; RV32IZCMP-NEXT: sw a6, 96(a5) -; RV32IZCMP-NEXT: sw t1, 92(a5) -; RV32IZCMP-NEXT: sw t2, 88(a5) -; RV32IZCMP-NEXT: sw t3, 84(a5) +; RV32IZCMP-NEXT: lw t2, 84(a5) +; RV32IZCMP-NEXT: lw t1, 88(a5) +; RV32IZCMP-NEXT: lw t0, 92(a5) +; RV32IZCMP-NEXT: lw a7, 96(a5) +; RV32IZCMP-NEXT: lw s0, 100(a5) +; RV32IZCMP-NEXT: lw a6, 104(a5) +; RV32IZCMP-NEXT: lw a3, 108(a5) +; RV32IZCMP-NEXT: lw a2, 112(a5) +; RV32IZCMP-NEXT: lw a1, 116(a5) +; RV32IZCMP-NEXT: lw a0, 120(a5) +; RV32IZCMP-NEXT: lw t3, 124(a5) +; RV32IZCMP-NEXT: sw t3, 124(a5) +; RV32IZCMP-NEXT: sw a0, 120(a5) +; RV32IZCMP-NEXT: sw a1, 116(a5) +; RV32IZCMP-NEXT: sw a2, 112(a5) +; RV32IZCMP-NEXT: sw a3, 108(a5) +; RV32IZCMP-NEXT: sw a6, 104(a5) +; RV32IZCMP-NEXT: sw s0, 100(a5) +; RV32IZCMP-NEXT: sw a7, 96(a5) +; RV32IZCMP-NEXT: sw t0, 92(a5) +; RV32IZCMP-NEXT: sw t1, 88(a5) +; RV32IZCMP-NEXT: sw t2, 84(a5) ; RV32IZCMP-NEXT: sw s1, 80(a5) ; RV32IZCMP-NEXT: sw ra, 76(a5) ; RV32IZCMP-NEXT: sw s11, 72(a5) @@ -3467,13 +3467,13 @@ define void @callee_no_irq() { ; RV32IZCMP-NEXT: lw a0, 8(sp) # 4-byte Folded Reload ; RV32IZCMP-NEXT: sw a0, 16(a5) ; RV32IZCMP-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV32IZCMP-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV32IZCMP-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV32IZCMP-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0) +; RV32IZCMP-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV32IZCMP-NEXT: cm.popret {ra, s0-s11}, 80 ; ; RV64IZCMP-LABEL: callee_no_irq: @@ -3493,16 +3493,16 @@ define void @callee_no_irq() { ; RV64IZCMP-NEXT: .cfi_offset s9, -24 ; RV64IZCMP-NEXT: .cfi_offset s10, -16 ; RV64IZCMP-NEXT: .cfi_offset s11, -8 -; RV64IZCMP-NEXT: lui t0, %hi(var_test_irq) -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(t0) +; RV64IZCMP-NEXT: lui a4, %hi(var_test_irq) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV64IZCMP-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(t0) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV64IZCMP-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(t0) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV64IZCMP-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(t0) +; RV64IZCMP-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV64IZCMP-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IZCMP-NEXT: addi a5, t0, %lo(var_test_irq) +; RV64IZCMP-NEXT: addi a5, a4, %lo(var_test_irq) ; RV64IZCMP-NEXT: lw a0, 16(a5) ; RV64IZCMP-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64IZCMP-NEXT: lw a0, 20(a5) @@ -3522,28 +3522,28 @@ define void @callee_no_irq() { ; RV64IZCMP-NEXT: lw s11, 72(a5) ; RV64IZCMP-NEXT: lw ra, 76(a5) ; RV64IZCMP-NEXT: lw s1, 80(a5) -; RV64IZCMP-NEXT: lw t3, 84(a5) -; RV64IZCMP-NEXT: lw t2, 88(a5) -; RV64IZCMP-NEXT: lw t1, 92(a5) -; RV64IZCMP-NEXT: lw a7, 112(a5) -; RV64IZCMP-NEXT: lw s0, 116(a5) -; RV64IZCMP-NEXT: lw a3, 120(a5) -; RV64IZCMP-NEXT: lw a0, 124(a5) -; RV64IZCMP-NEXT: lw a6, 96(a5) -; RV64IZCMP-NEXT: lw a4, 100(a5) -; RV64IZCMP-NEXT: lw a2, 104(a5) -; RV64IZCMP-NEXT: lw a1, 108(a5) -; RV64IZCMP-NEXT: sw a0, 124(a5) -; RV64IZCMP-NEXT: sw a3, 120(a5) -; RV64IZCMP-NEXT: sw s0, 116(a5) -; RV64IZCMP-NEXT: sw a7, 112(a5) -; RV64IZCMP-NEXT: sw a1, 108(a5) -; RV64IZCMP-NEXT: sw a2, 104(a5) -; RV64IZCMP-NEXT: sw a4, 100(a5) -; RV64IZCMP-NEXT: sw a6, 96(a5) -; RV64IZCMP-NEXT: sw t1, 92(a5) -; RV64IZCMP-NEXT: sw t2, 88(a5) -; RV64IZCMP-NEXT: sw t3, 84(a5) +; RV64IZCMP-NEXT: lw t2, 84(a5) +; RV64IZCMP-NEXT: lw t1, 88(a5) +; RV64IZCMP-NEXT: lw t0, 92(a5) +; RV64IZCMP-NEXT: lw a7, 96(a5) +; RV64IZCMP-NEXT: lw s0, 100(a5) +; RV64IZCMP-NEXT: lw a6, 104(a5) +; RV64IZCMP-NEXT: lw a3, 108(a5) +; RV64IZCMP-NEXT: lw a2, 112(a5) +; RV64IZCMP-NEXT: lw a1, 116(a5) +; RV64IZCMP-NEXT: lw a0, 120(a5) +; RV64IZCMP-NEXT: lw t3, 124(a5) +; RV64IZCMP-NEXT: sw t3, 124(a5) +; RV64IZCMP-NEXT: sw a0, 120(a5) +; RV64IZCMP-NEXT: sw a1, 116(a5) +; RV64IZCMP-NEXT: sw a2, 112(a5) +; RV64IZCMP-NEXT: sw a3, 108(a5) +; RV64IZCMP-NEXT: sw a6, 104(a5) +; RV64IZCMP-NEXT: sw s0, 100(a5) +; RV64IZCMP-NEXT: sw a7, 96(a5) +; RV64IZCMP-NEXT: sw t0, 92(a5) +; RV64IZCMP-NEXT: sw t1, 88(a5) +; RV64IZCMP-NEXT: sw t2, 84(a5) ; RV64IZCMP-NEXT: sw s1, 80(a5) ; RV64IZCMP-NEXT: sw ra, 76(a5) ; RV64IZCMP-NEXT: sw s11, 72(a5) @@ -3564,13 +3564,13 @@ define void @callee_no_irq() { ; RV64IZCMP-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64IZCMP-NEXT: sw a0, 16(a5) ; RV64IZCMP-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV64IZCMP-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV64IZCMP-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV64IZCMP-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(t0) +; RV64IZCMP-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV64IZCMP-NEXT: cm.popret {ra, s0-s11}, 160 ; ; RV32IZCMP-SR-LABEL: callee_no_irq: @@ -3590,16 +3590,16 @@ define void @callee_no_irq() { ; RV32IZCMP-SR-NEXT: .cfi_offset s9, -12 ; RV32IZCMP-SR-NEXT: .cfi_offset s10, -8 ; RV32IZCMP-SR-NEXT: .cfi_offset s11, -4 -; RV32IZCMP-SR-NEXT: lui t0, %hi(var_test_irq) -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0) +; RV32IZCMP-SR-NEXT: lui a4, %hi(var_test_irq) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV32IZCMP-SR-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV32IZCMP-SR-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV32IZCMP-SR-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0) +; RV32IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV32IZCMP-SR-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq) +; RV32IZCMP-SR-NEXT: addi a5, a4, %lo(var_test_irq) ; RV32IZCMP-SR-NEXT: lw a0, 16(a5) ; RV32IZCMP-SR-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32IZCMP-SR-NEXT: lw a0, 20(a5) @@ -3619,28 +3619,28 @@ define void @callee_no_irq() { ; RV32IZCMP-SR-NEXT: lw s11, 72(a5) ; RV32IZCMP-SR-NEXT: lw ra, 76(a5) ; RV32IZCMP-SR-NEXT: lw s1, 80(a5) -; RV32IZCMP-SR-NEXT: lw t3, 84(a5) -; RV32IZCMP-SR-NEXT: lw t2, 88(a5) -; RV32IZCMP-SR-NEXT: lw t1, 92(a5) -; RV32IZCMP-SR-NEXT: lw a7, 112(a5) -; RV32IZCMP-SR-NEXT: lw s0, 116(a5) -; RV32IZCMP-SR-NEXT: lw a3, 120(a5) -; RV32IZCMP-SR-NEXT: lw a0, 124(a5) -; RV32IZCMP-SR-NEXT: lw a6, 96(a5) -; RV32IZCMP-SR-NEXT: lw a4, 100(a5) -; RV32IZCMP-SR-NEXT: lw a2, 104(a5) -; RV32IZCMP-SR-NEXT: lw a1, 108(a5) -; RV32IZCMP-SR-NEXT: sw a0, 124(a5) -; RV32IZCMP-SR-NEXT: sw a3, 120(a5) -; RV32IZCMP-SR-NEXT: sw s0, 116(a5) -; RV32IZCMP-SR-NEXT: sw a7, 112(a5) -; RV32IZCMP-SR-NEXT: sw a1, 108(a5) -; RV32IZCMP-SR-NEXT: sw a2, 104(a5) -; RV32IZCMP-SR-NEXT: sw a4, 100(a5) -; RV32IZCMP-SR-NEXT: sw a6, 96(a5) -; RV32IZCMP-SR-NEXT: sw t1, 92(a5) -; RV32IZCMP-SR-NEXT: sw t2, 88(a5) -; RV32IZCMP-SR-NEXT: sw t3, 84(a5) +; RV32IZCMP-SR-NEXT: lw t2, 84(a5) +; RV32IZCMP-SR-NEXT: lw t1, 88(a5) +; RV32IZCMP-SR-NEXT: lw t0, 92(a5) +; RV32IZCMP-SR-NEXT: lw a7, 96(a5) +; RV32IZCMP-SR-NEXT: lw s0, 100(a5) +; RV32IZCMP-SR-NEXT: lw a6, 104(a5) +; RV32IZCMP-SR-NEXT: lw a3, 108(a5) +; RV32IZCMP-SR-NEXT: lw a2, 112(a5) +; RV32IZCMP-SR-NEXT: lw a1, 116(a5) +; RV32IZCMP-SR-NEXT: lw a0, 120(a5) +; RV32IZCMP-SR-NEXT: lw t3, 124(a5) +; RV32IZCMP-SR-NEXT: sw t3, 124(a5) +; RV32IZCMP-SR-NEXT: sw a0, 120(a5) +; RV32IZCMP-SR-NEXT: sw a1, 116(a5) +; RV32IZCMP-SR-NEXT: sw a2, 112(a5) +; RV32IZCMP-SR-NEXT: sw a3, 108(a5) +; RV32IZCMP-SR-NEXT: sw a6, 104(a5) +; RV32IZCMP-SR-NEXT: sw s0, 100(a5) +; RV32IZCMP-SR-NEXT: sw a7, 96(a5) +; RV32IZCMP-SR-NEXT: sw t0, 92(a5) +; RV32IZCMP-SR-NEXT: sw t1, 88(a5) +; RV32IZCMP-SR-NEXT: sw t2, 84(a5) ; RV32IZCMP-SR-NEXT: sw s1, 80(a5) ; RV32IZCMP-SR-NEXT: sw ra, 76(a5) ; RV32IZCMP-SR-NEXT: sw s11, 72(a5) @@ -3661,13 +3661,13 @@ define void @callee_no_irq() { ; RV32IZCMP-SR-NEXT: lw a0, 8(sp) # 4-byte Folded Reload ; RV32IZCMP-SR-NEXT: sw a0, 16(a5) ; RV32IZCMP-SR-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV32IZCMP-SR-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV32IZCMP-SR-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV32IZCMP-SR-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0) +; RV32IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV32IZCMP-SR-NEXT: cm.popret {ra, s0-s11}, 80 ; ; RV64IZCMP-SR-LABEL: callee_no_irq: @@ -3687,16 +3687,16 @@ define void @callee_no_irq() { ; RV64IZCMP-SR-NEXT: .cfi_offset s9, -24 ; RV64IZCMP-SR-NEXT: .cfi_offset s10, -16 ; RV64IZCMP-SR-NEXT: .cfi_offset s11, -8 -; RV64IZCMP-SR-NEXT: lui t0, %hi(var_test_irq) -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(t0) +; RV64IZCMP-SR-NEXT: lui a4, %hi(var_test_irq) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV64IZCMP-SR-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(t0) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV64IZCMP-SR-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(t0) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV64IZCMP-SR-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(t0) +; RV64IZCMP-SR-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV64IZCMP-SR-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64IZCMP-SR-NEXT: addi a5, t0, %lo(var_test_irq) +; RV64IZCMP-SR-NEXT: addi a5, a4, %lo(var_test_irq) ; RV64IZCMP-SR-NEXT: lw a0, 16(a5) ; RV64IZCMP-SR-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64IZCMP-SR-NEXT: lw a0, 20(a5) @@ -3716,28 +3716,28 @@ define void @callee_no_irq() { ; RV64IZCMP-SR-NEXT: lw s11, 72(a5) ; RV64IZCMP-SR-NEXT: lw ra, 76(a5) ; RV64IZCMP-SR-NEXT: lw s1, 80(a5) -; RV64IZCMP-SR-NEXT: lw t3, 84(a5) -; RV64IZCMP-SR-NEXT: lw t2, 88(a5) -; RV64IZCMP-SR-NEXT: lw t1, 92(a5) -; RV64IZCMP-SR-NEXT: lw a7, 112(a5) -; RV64IZCMP-SR-NEXT: lw s0, 116(a5) -; RV64IZCMP-SR-NEXT: lw a3, 120(a5) -; RV64IZCMP-SR-NEXT: lw a0, 124(a5) -; RV64IZCMP-SR-NEXT: lw a6, 96(a5) -; RV64IZCMP-SR-NEXT: lw a4, 100(a5) -; RV64IZCMP-SR-NEXT: lw a2, 104(a5) -; RV64IZCMP-SR-NEXT: lw a1, 108(a5) -; RV64IZCMP-SR-NEXT: sw a0, 124(a5) -; RV64IZCMP-SR-NEXT: sw a3, 120(a5) -; RV64IZCMP-SR-NEXT: sw s0, 116(a5) -; RV64IZCMP-SR-NEXT: sw a7, 112(a5) -; RV64IZCMP-SR-NEXT: sw a1, 108(a5) -; RV64IZCMP-SR-NEXT: sw a2, 104(a5) -; RV64IZCMP-SR-NEXT: sw a4, 100(a5) -; RV64IZCMP-SR-NEXT: sw a6, 96(a5) -; RV64IZCMP-SR-NEXT: sw t1, 92(a5) -; RV64IZCMP-SR-NEXT: sw t2, 88(a5) -; RV64IZCMP-SR-NEXT: sw t3, 84(a5) +; RV64IZCMP-SR-NEXT: lw t2, 84(a5) +; RV64IZCMP-SR-NEXT: lw t1, 88(a5) +; RV64IZCMP-SR-NEXT: lw t0, 92(a5) +; RV64IZCMP-SR-NEXT: lw a7, 96(a5) +; RV64IZCMP-SR-NEXT: lw s0, 100(a5) +; RV64IZCMP-SR-NEXT: lw a6, 104(a5) +; RV64IZCMP-SR-NEXT: lw a3, 108(a5) +; RV64IZCMP-SR-NEXT: lw a2, 112(a5) +; RV64IZCMP-SR-NEXT: lw a1, 116(a5) +; RV64IZCMP-SR-NEXT: lw a0, 120(a5) +; RV64IZCMP-SR-NEXT: lw t3, 124(a5) +; RV64IZCMP-SR-NEXT: sw t3, 124(a5) +; RV64IZCMP-SR-NEXT: sw a0, 120(a5) +; RV64IZCMP-SR-NEXT: sw a1, 116(a5) +; RV64IZCMP-SR-NEXT: sw a2, 112(a5) +; RV64IZCMP-SR-NEXT: sw a3, 108(a5) +; RV64IZCMP-SR-NEXT: sw a6, 104(a5) +; RV64IZCMP-SR-NEXT: sw s0, 100(a5) +; RV64IZCMP-SR-NEXT: sw a7, 96(a5) +; RV64IZCMP-SR-NEXT: sw t0, 92(a5) +; RV64IZCMP-SR-NEXT: sw t1, 88(a5) +; RV64IZCMP-SR-NEXT: sw t2, 84(a5) ; RV64IZCMP-SR-NEXT: sw s1, 80(a5) ; RV64IZCMP-SR-NEXT: sw ra, 76(a5) ; RV64IZCMP-SR-NEXT: sw s11, 72(a5) @@ -3758,13 +3758,13 @@ define void @callee_no_irq() { ; RV64IZCMP-SR-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64IZCMP-SR-NEXT: sw a0, 16(a5) ; RV64IZCMP-SR-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(t0) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV64IZCMP-SR-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(t0) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV64IZCMP-SR-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(t0) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV64IZCMP-SR-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(t0) +; RV64IZCMP-SR-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV64IZCMP-SR-NEXT: cm.popret {ra, s0-s11}, 160 ; ; RV32I-LABEL: callee_no_irq: @@ -3797,16 +3797,16 @@ define void @callee_no_irq() { ; RV32I-NEXT: .cfi_offset s9, -44 ; RV32I-NEXT: .cfi_offset s10, -48 ; RV32I-NEXT: .cfi_offset s11, -52 -; RV32I-NEXT: lui a7, %hi(var_test_irq) -; RV32I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: lui a4, %hi(var_test_irq) +; RV32I-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV32I-NEXT: sw a0, 24(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV32I-NEXT: sw a0, 20(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV32I-NEXT: sw a0, 16(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV32I-NEXT: sw a0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV32I-NEXT: addi a5, a4, %lo(var_test_irq) ; RV32I-NEXT: lw a0, 16(a5) ; RV32I-NEXT: sw a0, 8(sp) # 4-byte Folded Spill ; RV32I-NEXT: lw a0, 20(a5) @@ -3829,22 +3829,22 @@ define void @callee_no_irq() { ; RV32I-NEXT: lw s8, 84(a5) ; RV32I-NEXT: lw s9, 88(a5) ; RV32I-NEXT: lw s10, 92(a5) -; RV32I-NEXT: lw s11, 112(a5) -; RV32I-NEXT: lw ra, 116(a5) -; RV32I-NEXT: lw a3, 120(a5) -; RV32I-NEXT: lw a0, 124(a5) -; RV32I-NEXT: lw a6, 96(a5) -; RV32I-NEXT: lw a4, 100(a5) -; RV32I-NEXT: lw a2, 104(a5) -; RV32I-NEXT: lw a1, 108(a5) -; RV32I-NEXT: sw a0, 124(a5) -; RV32I-NEXT: sw a3, 120(a5) -; RV32I-NEXT: sw ra, 116(a5) -; RV32I-NEXT: sw s11, 112(a5) -; RV32I-NEXT: sw a1, 108(a5) -; RV32I-NEXT: sw a2, 104(a5) -; RV32I-NEXT: sw a4, 100(a5) -; RV32I-NEXT: sw a6, 96(a5) +; RV32I-NEXT: lw s11, 96(a5) +; RV32I-NEXT: lw ra, 100(a5) +; RV32I-NEXT: lw a6, 104(a5) +; RV32I-NEXT: lw a3, 108(a5) +; RV32I-NEXT: lw a2, 112(a5) +; RV32I-NEXT: lw a1, 116(a5) +; RV32I-NEXT: lw a0, 120(a5) +; RV32I-NEXT: lw a7, 124(a5) +; RV32I-NEXT: sw a7, 124(a5) +; RV32I-NEXT: sw a0, 120(a5) +; RV32I-NEXT: sw a1, 116(a5) +; RV32I-NEXT: sw a2, 112(a5) +; RV32I-NEXT: sw a3, 108(a5) +; RV32I-NEXT: sw a6, 104(a5) +; RV32I-NEXT: sw ra, 100(a5) +; RV32I-NEXT: sw s11, 96(a5) ; RV32I-NEXT: sw s10, 92(a5) ; RV32I-NEXT: sw s9, 88(a5) ; RV32I-NEXT: sw s8, 84(a5) @@ -3868,13 +3868,13 @@ define void @callee_no_irq() { ; RV32I-NEXT: lw a0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: sw a0, 16(a5) ; RV32I-NEXT: lw a0, 12(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV32I-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV32I-NEXT: lw a0, 16(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV32I-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV32I-NEXT: lw a0, 20(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV32I-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV32I-NEXT: lw a0, 24(sp) # 4-byte Folded Reload -; RV32I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV32I-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV32I-NEXT: lw ra, 76(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 72(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 68(sp) # 4-byte Folded Reload @@ -3935,16 +3935,16 @@ define void @callee_no_irq() { ; RV64I-NEXT: .cfi_offset s9, -88 ; RV64I-NEXT: .cfi_offset s10, -96 ; RV64I-NEXT: .cfi_offset s11, -104 -; RV64I-NEXT: lui a7, %hi(var_test_irq) -; RV64I-NEXT: lw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: lui a4, %hi(var_test_irq) +; RV64I-NEXT: lw a0, %lo(var_test_irq)(a4) ; RV64I-NEXT: sd a0, 48(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: lw a0, %lo(var_test_irq+4)(a4) ; RV64I-NEXT: sd a0, 40(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: lw a0, %lo(var_test_irq+8)(a4) ; RV64I-NEXT: sd a0, 32(sp) # 8-byte Folded Spill -; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: lw a0, %lo(var_test_irq+12)(a4) ; RV64I-NEXT: sd a0, 24(sp) # 8-byte Folded Spill -; RV64I-NEXT: addi a5, a7, %lo(var_test_irq) +; RV64I-NEXT: addi a5, a4, %lo(var_test_irq) ; RV64I-NEXT: lw a0, 16(a5) ; RV64I-NEXT: sd a0, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: lw a0, 20(a5) @@ -3967,22 +3967,22 @@ define void @callee_no_irq() { ; RV64I-NEXT: lw s8, 84(a5) ; RV64I-NEXT: lw s9, 88(a5) ; RV64I-NEXT: lw s10, 92(a5) -; RV64I-NEXT: lw s11, 112(a5) -; RV64I-NEXT: lw ra, 116(a5) -; RV64I-NEXT: lw a3, 120(a5) -; RV64I-NEXT: lw a0, 124(a5) -; RV64I-NEXT: lw a6, 96(a5) -; RV64I-NEXT: lw a4, 100(a5) -; RV64I-NEXT: lw a2, 104(a5) -; RV64I-NEXT: lw a1, 108(a5) -; RV64I-NEXT: sw a0, 124(a5) -; RV64I-NEXT: sw a3, 120(a5) -; RV64I-NEXT: sw ra, 116(a5) -; RV64I-NEXT: sw s11, 112(a5) -; RV64I-NEXT: sw a1, 108(a5) -; RV64I-NEXT: sw a2, 104(a5) -; RV64I-NEXT: sw a4, 100(a5) -; RV64I-NEXT: sw a6, 96(a5) +; RV64I-NEXT: lw s11, 96(a5) +; RV64I-NEXT: lw ra, 100(a5) +; RV64I-NEXT: lw a6, 104(a5) +; RV64I-NEXT: lw a3, 108(a5) +; RV64I-NEXT: lw a2, 112(a5) +; RV64I-NEXT: lw a1, 116(a5) +; RV64I-NEXT: lw a0, 120(a5) +; RV64I-NEXT: lw a7, 124(a5) +; RV64I-NEXT: sw a7, 124(a5) +; RV64I-NEXT: sw a0, 120(a5) +; RV64I-NEXT: sw a1, 116(a5) +; RV64I-NEXT: sw a2, 112(a5) +; RV64I-NEXT: sw a3, 108(a5) +; RV64I-NEXT: sw a6, 104(a5) +; RV64I-NEXT: sw ra, 100(a5) +; RV64I-NEXT: sw s11, 96(a5) ; RV64I-NEXT: sw s10, 92(a5) ; RV64I-NEXT: sw s9, 88(a5) ; RV64I-NEXT: sw s8, 84(a5) @@ -4006,13 +4006,13 @@ define void @callee_no_irq() { ; RV64I-NEXT: ld a0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: sw a0, 16(a5) ; RV64I-NEXT: ld a0, 24(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a7) +; RV64I-NEXT: sw a0, %lo(var_test_irq+12)(a4) ; RV64I-NEXT: ld a0, 32(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a7) +; RV64I-NEXT: sw a0, %lo(var_test_irq+8)(a4) ; RV64I-NEXT: ld a0, 40(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a7) +; RV64I-NEXT: sw a0, %lo(var_test_irq+4)(a4) ; RV64I-NEXT: ld a0, 48(sp) # 8-byte Folded Reload -; RV64I-NEXT: sw a0, %lo(var_test_irq)(a7) +; RV64I-NEXT: sw a0, %lo(var_test_irq)(a4) ; RV64I-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 144(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 136(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll index 32261ee47164e..c53e6dc3b8089 100644 --- a/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll +++ b/llvm/test/CodeGen/RISCV/riscv-codegenprepare-asm.ll @@ -50,8 +50,8 @@ define void @test2(ptr nocapture noundef %a, i32 noundef signext %n) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: blez a1, .LBB1_7 ; CHECK-NEXT: # %bb.1: # %for.body.preheader -; CHECK-NEXT: li a3, 1 ; CHECK-NEXT: andi a2, a1, 1 +; CHECK-NEXT: li a3, 1 ; CHECK-NEXT: bne a1, a3, .LBB1_3 ; CHECK-NEXT: # %bb.2: ; CHECK-NEXT: li a3, 0 diff --git a/llvm/test/CodeGen/RISCV/rotl-rotr.ll b/llvm/test/CodeGen/RISCV/rotl-rotr.ll index 634cca5dcdb71..5522e3c9a0fb9 100644 --- a/llvm/test/CodeGen/RISCV/rotl-rotr.ll +++ b/llvm/test/CodeGen/RISCV/rotl-rotr.ll @@ -119,8 +119,8 @@ define i32 @rotr_32(i32 %x, i32 %y) nounwind { define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: sll a4, a0, a2 +; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: bltz a5, .LBB2_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a4 @@ -167,8 +167,8 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; ; RV32ZBB-LABEL: rotl_64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: sll a4, a0, a2 +; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: bltz a5, .LBB2_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a4 @@ -212,8 +212,8 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { ; ; RV32XTHEADBB-LABEL: rotl_64: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: sll a4, a0, a2 +; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: bltz a5, .LBB2_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a4 @@ -267,8 +267,8 @@ define i64 @rotl_64(i64 %x, i64 %y) nounwind { define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: srl a4, a1, a2 +; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: bltz a5, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a4 @@ -315,8 +315,8 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; ; RV32ZBB-LABEL: rotr_64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: srl a4, a1, a2 +; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: bltz a5, .LBB3_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a4 @@ -360,8 +360,8 @@ define i64 @rotr_64(i64 %x, i64 %y) nounwind { ; ; RV32XTHEADBB-LABEL: rotr_64: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: srl a4, a1, a2 +; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: bltz a5, .LBB3_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a4 @@ -707,8 +707,8 @@ define i32 @rotr_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind { define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotl_64_mask: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: sll a4, a0, a2 +; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: bltz a5, .LBB10_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a4 @@ -720,24 +720,24 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: srl a6, a7, a6 ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB10_3: -; RV32I-NEXT: srai t0, a5, 31 +; RV32I-NEXT: srai a6, a5, 31 ; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: andi a7, a5, 63 -; RV32I-NEXT: addi a6, a7, -32 -; RV32I-NEXT: and a2, t0, a4 -; RV32I-NEXT: bltz a6, .LBB10_5 +; RV32I-NEXT: and a2, a6, a4 +; RV32I-NEXT: andi a6, a5, 63 +; RV32I-NEXT: addi a4, a6, -32 +; RV32I-NEXT: bltz a4, .LBB10_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a0, a1, a7 +; RV32I-NEXT: srl a0, a1, a6 ; RV32I-NEXT: j .LBB10_6 ; RV32I-NEXT: .LBB10_5: ; RV32I-NEXT: srl a0, a0, a5 -; RV32I-NEXT: not a4, a7 +; RV32I-NEXT: not a6, a6 ; RV32I-NEXT: slli a7, a1, 1 -; RV32I-NEXT: sll a4, a7, a4 -; RV32I-NEXT: or a0, a0, a4 +; RV32I-NEXT: sll a6, a7, a6 +; RV32I-NEXT: or a0, a0, a6 ; RV32I-NEXT: .LBB10_6: ; RV32I-NEXT: srl a1, a1, a5 -; RV32I-NEXT: srai a4, a6, 31 +; RV32I-NEXT: srai a4, a4, 31 ; RV32I-NEXT: and a1, a4, a1 ; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: or a0, a2, a0 @@ -753,8 +753,8 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; ; RV32ZBB-LABEL: rotl_64_mask: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: sll a4, a0, a2 +; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: bltz a5, .LBB10_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a4 @@ -766,24 +766,24 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: srl a6, a7, a6 ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB10_3: -; RV32ZBB-NEXT: srai t0, a5, 31 +; RV32ZBB-NEXT: srai a6, a5, 31 ; RV32ZBB-NEXT: neg a5, a2 -; RV32ZBB-NEXT: andi a7, a5, 63 -; RV32ZBB-NEXT: addi a6, a7, -32 -; RV32ZBB-NEXT: and a2, t0, a4 -; RV32ZBB-NEXT: bltz a6, .LBB10_5 +; RV32ZBB-NEXT: and a2, a6, a4 +; RV32ZBB-NEXT: andi a6, a5, 63 +; RV32ZBB-NEXT: addi a4, a6, -32 +; RV32ZBB-NEXT: bltz a4, .LBB10_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: srl a0, a1, a7 +; RV32ZBB-NEXT: srl a0, a1, a6 ; RV32ZBB-NEXT: j .LBB10_6 ; RV32ZBB-NEXT: .LBB10_5: ; RV32ZBB-NEXT: srl a0, a0, a5 -; RV32ZBB-NEXT: not a4, a7 +; RV32ZBB-NEXT: not a6, a6 ; RV32ZBB-NEXT: slli a7, a1, 1 -; RV32ZBB-NEXT: sll a4, a7, a4 -; RV32ZBB-NEXT: or a0, a0, a4 +; RV32ZBB-NEXT: sll a6, a7, a6 +; RV32ZBB-NEXT: or a0, a0, a6 ; RV32ZBB-NEXT: .LBB10_6: ; RV32ZBB-NEXT: srl a1, a1, a5 -; RV32ZBB-NEXT: srai a4, a6, 31 +; RV32ZBB-NEXT: srai a4, a4, 31 ; RV32ZBB-NEXT: and a1, a4, a1 ; RV32ZBB-NEXT: or a1, a3, a1 ; RV32ZBB-NEXT: or a0, a2, a0 @@ -796,8 +796,8 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; ; RV32XTHEADBB-LABEL: rotl_64_mask: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: sll a4, a0, a2 +; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: bltz a5, .LBB10_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a4 @@ -809,24 +809,24 @@ define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: srl a6, a7, a6 ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB10_3: -; RV32XTHEADBB-NEXT: srai t0, a5, 31 +; RV32XTHEADBB-NEXT: srai a6, a5, 31 ; RV32XTHEADBB-NEXT: neg a5, a2 -; RV32XTHEADBB-NEXT: andi a7, a5, 63 -; RV32XTHEADBB-NEXT: addi a6, a7, -32 -; RV32XTHEADBB-NEXT: and a2, t0, a4 -; RV32XTHEADBB-NEXT: bltz a6, .LBB10_5 +; RV32XTHEADBB-NEXT: and a2, a6, a4 +; RV32XTHEADBB-NEXT: andi a6, a5, 63 +; RV32XTHEADBB-NEXT: addi a4, a6, -32 +; RV32XTHEADBB-NEXT: bltz a4, .LBB10_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: srl a0, a1, a7 +; RV32XTHEADBB-NEXT: srl a0, a1, a6 ; RV32XTHEADBB-NEXT: j .LBB10_6 ; RV32XTHEADBB-NEXT: .LBB10_5: ; RV32XTHEADBB-NEXT: srl a0, a0, a5 -; RV32XTHEADBB-NEXT: not a4, a7 +; RV32XTHEADBB-NEXT: not a6, a6 ; RV32XTHEADBB-NEXT: slli a7, a1, 1 -; RV32XTHEADBB-NEXT: sll a4, a7, a4 -; RV32XTHEADBB-NEXT: or a0, a0, a4 +; RV32XTHEADBB-NEXT: sll a6, a7, a6 +; RV32XTHEADBB-NEXT: or a0, a0, a6 ; RV32XTHEADBB-NEXT: .LBB10_6: ; RV32XTHEADBB-NEXT: srl a1, a1, a5 -; RV32XTHEADBB-NEXT: srai a4, a6, 31 +; RV32XTHEADBB-NEXT: srai a4, a4, 31 ; RV32XTHEADBB-NEXT: and a1, a4, a1 ; RV32XTHEADBB-NEXT: or a1, a3, a1 ; RV32XTHEADBB-NEXT: or a0, a2, a0 @@ -863,12 +863,12 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: srl a3, a6, a3 ; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: .LBB11_3: -; RV32I-NEXT: sll a7, a0, a2 -; RV32I-NEXT: srai t0, a4, 31 +; RV32I-NEXT: sll a5, a0, a2 +; RV32I-NEXT: srai a6, a4, 31 ; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: and a2, a6, a5 ; RV32I-NEXT: andi a6, a4, 63 ; RV32I-NEXT: addi a5, a6, -32 -; RV32I-NEXT: and a2, t0, a7 ; RV32I-NEXT: bltz a5, .LBB11_5 ; RV32I-NEXT: # %bb.4: ; RV32I-NEXT: srl a0, a1, a6 @@ -910,12 +910,12 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: srl a3, a6, a3 ; RV32ZBB-NEXT: or a3, a5, a3 ; RV32ZBB-NEXT: .LBB11_3: -; RV32ZBB-NEXT: sll a7, a0, a2 -; RV32ZBB-NEXT: srai t0, a4, 31 +; RV32ZBB-NEXT: sll a5, a0, a2 +; RV32ZBB-NEXT: srai a6, a4, 31 ; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: and a2, a6, a5 ; RV32ZBB-NEXT: andi a6, a4, 63 ; RV32ZBB-NEXT: addi a5, a6, -32 -; RV32ZBB-NEXT: and a2, t0, a7 ; RV32ZBB-NEXT: bltz a5, .LBB11_5 ; RV32ZBB-NEXT: # %bb.4: ; RV32ZBB-NEXT: srl a0, a1, a6 @@ -954,12 +954,12 @@ define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: srl a3, a6, a3 ; RV32XTHEADBB-NEXT: or a3, a5, a3 ; RV32XTHEADBB-NEXT: .LBB11_3: -; RV32XTHEADBB-NEXT: sll a7, a0, a2 -; RV32XTHEADBB-NEXT: srai t0, a4, 31 +; RV32XTHEADBB-NEXT: sll a5, a0, a2 +; RV32XTHEADBB-NEXT: srai a6, a4, 31 ; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: and a2, a6, a5 ; RV32XTHEADBB-NEXT: andi a6, a4, 63 ; RV32XTHEADBB-NEXT: addi a5, a6, -32 -; RV32XTHEADBB-NEXT: and a2, t0, a7 ; RV32XTHEADBB-NEXT: bltz a5, .LBB11_5 ; RV32XTHEADBB-NEXT: # %bb.4: ; RV32XTHEADBB-NEXT: srl a0, a1, a6 @@ -1042,8 +1042,8 @@ define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind { define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-LABEL: rotr_64_mask: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: srl a4, a1, a2 +; RV32I-NEXT: addi a5, a2, -32 ; RV32I-NEXT: bltz a5, .LBB13_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a4 @@ -1055,24 +1055,24 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: sll a6, a7, a6 ; RV32I-NEXT: or a3, a3, a6 ; RV32I-NEXT: .LBB13_3: -; RV32I-NEXT: srai t0, a5, 31 +; RV32I-NEXT: srai a6, a5, 31 ; RV32I-NEXT: neg a5, a2 -; RV32I-NEXT: andi a7, a5, 63 -; RV32I-NEXT: addi a6, a7, -32 -; RV32I-NEXT: and a2, t0, a4 -; RV32I-NEXT: bltz a6, .LBB13_5 +; RV32I-NEXT: and a2, a6, a4 +; RV32I-NEXT: andi a6, a5, 63 +; RV32I-NEXT: addi a4, a6, -32 +; RV32I-NEXT: bltz a4, .LBB13_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a1, a0, a7 +; RV32I-NEXT: sll a1, a0, a6 ; RV32I-NEXT: j .LBB13_6 ; RV32I-NEXT: .LBB13_5: ; RV32I-NEXT: sll a1, a1, a5 -; RV32I-NEXT: not a4, a7 +; RV32I-NEXT: not a6, a6 ; RV32I-NEXT: srli a7, a0, 1 -; RV32I-NEXT: srl a4, a7, a4 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: srl a6, a7, a6 +; RV32I-NEXT: or a1, a1, a6 ; RV32I-NEXT: .LBB13_6: ; RV32I-NEXT: sll a0, a0, a5 -; RV32I-NEXT: srai a4, a6, 31 +; RV32I-NEXT: srai a4, a4, 31 ; RV32I-NEXT: and a0, a4, a0 ; RV32I-NEXT: or a0, a3, a0 ; RV32I-NEXT: or a1, a2, a1 @@ -1088,8 +1088,8 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; ; RV32ZBB-LABEL: rotr_64_mask: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: srl a4, a1, a2 +; RV32ZBB-NEXT: addi a5, a2, -32 ; RV32ZBB-NEXT: bltz a5, .LBB13_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a4 @@ -1101,24 +1101,24 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: sll a6, a7, a6 ; RV32ZBB-NEXT: or a3, a3, a6 ; RV32ZBB-NEXT: .LBB13_3: -; RV32ZBB-NEXT: srai t0, a5, 31 +; RV32ZBB-NEXT: srai a6, a5, 31 ; RV32ZBB-NEXT: neg a5, a2 -; RV32ZBB-NEXT: andi a7, a5, 63 -; RV32ZBB-NEXT: addi a6, a7, -32 -; RV32ZBB-NEXT: and a2, t0, a4 -; RV32ZBB-NEXT: bltz a6, .LBB13_5 +; RV32ZBB-NEXT: and a2, a6, a4 +; RV32ZBB-NEXT: andi a6, a5, 63 +; RV32ZBB-NEXT: addi a4, a6, -32 +; RV32ZBB-NEXT: bltz a4, .LBB13_5 ; RV32ZBB-NEXT: # %bb.4: -; RV32ZBB-NEXT: sll a1, a0, a7 +; RV32ZBB-NEXT: sll a1, a0, a6 ; RV32ZBB-NEXT: j .LBB13_6 ; RV32ZBB-NEXT: .LBB13_5: ; RV32ZBB-NEXT: sll a1, a1, a5 -; RV32ZBB-NEXT: not a4, a7 +; RV32ZBB-NEXT: not a6, a6 ; RV32ZBB-NEXT: srli a7, a0, 1 -; RV32ZBB-NEXT: srl a4, a7, a4 -; RV32ZBB-NEXT: or a1, a1, a4 +; RV32ZBB-NEXT: srl a6, a7, a6 +; RV32ZBB-NEXT: or a1, a1, a6 ; RV32ZBB-NEXT: .LBB13_6: ; RV32ZBB-NEXT: sll a0, a0, a5 -; RV32ZBB-NEXT: srai a4, a6, 31 +; RV32ZBB-NEXT: srai a4, a4, 31 ; RV32ZBB-NEXT: and a0, a4, a0 ; RV32ZBB-NEXT: or a0, a3, a0 ; RV32ZBB-NEXT: or a1, a2, a1 @@ -1131,8 +1131,8 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; ; RV32XTHEADBB-LABEL: rotr_64_mask: ; RV32XTHEADBB: # %bb.0: -; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: srl a4, a1, a2 +; RV32XTHEADBB-NEXT: addi a5, a2, -32 ; RV32XTHEADBB-NEXT: bltz a5, .LBB13_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a4 @@ -1144,24 +1144,24 @@ define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: sll a6, a7, a6 ; RV32XTHEADBB-NEXT: or a3, a3, a6 ; RV32XTHEADBB-NEXT: .LBB13_3: -; RV32XTHEADBB-NEXT: srai t0, a5, 31 +; RV32XTHEADBB-NEXT: srai a6, a5, 31 ; RV32XTHEADBB-NEXT: neg a5, a2 -; RV32XTHEADBB-NEXT: andi a7, a5, 63 -; RV32XTHEADBB-NEXT: addi a6, a7, -32 -; RV32XTHEADBB-NEXT: and a2, t0, a4 -; RV32XTHEADBB-NEXT: bltz a6, .LBB13_5 +; RV32XTHEADBB-NEXT: and a2, a6, a4 +; RV32XTHEADBB-NEXT: andi a6, a5, 63 +; RV32XTHEADBB-NEXT: addi a4, a6, -32 +; RV32XTHEADBB-NEXT: bltz a4, .LBB13_5 ; RV32XTHEADBB-NEXT: # %bb.4: -; RV32XTHEADBB-NEXT: sll a1, a0, a7 +; RV32XTHEADBB-NEXT: sll a1, a0, a6 ; RV32XTHEADBB-NEXT: j .LBB13_6 ; RV32XTHEADBB-NEXT: .LBB13_5: ; RV32XTHEADBB-NEXT: sll a1, a1, a5 -; RV32XTHEADBB-NEXT: not a4, a7 +; RV32XTHEADBB-NEXT: not a6, a6 ; RV32XTHEADBB-NEXT: srli a7, a0, 1 -; RV32XTHEADBB-NEXT: srl a4, a7, a4 -; RV32XTHEADBB-NEXT: or a1, a1, a4 +; RV32XTHEADBB-NEXT: srl a6, a7, a6 +; RV32XTHEADBB-NEXT: or a1, a1, a6 ; RV32XTHEADBB-NEXT: .LBB13_6: ; RV32XTHEADBB-NEXT: sll a0, a0, a5 -; RV32XTHEADBB-NEXT: srai a4, a6, 31 +; RV32XTHEADBB-NEXT: srai a4, a4, 31 ; RV32XTHEADBB-NEXT: and a0, a4, a0 ; RV32XTHEADBB-NEXT: or a0, a3, a0 ; RV32XTHEADBB-NEXT: or a1, a2, a1 @@ -1198,12 +1198,12 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32I-NEXT: sll a3, a6, a3 ; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: .LBB14_3: -; RV32I-NEXT: srl a7, a1, a2 -; RV32I-NEXT: srai t0, a4, 31 +; RV32I-NEXT: srl a5, a1, a2 +; RV32I-NEXT: srai a6, a4, 31 ; RV32I-NEXT: neg a4, a2 +; RV32I-NEXT: and a2, a6, a5 ; RV32I-NEXT: andi a6, a4, 63 ; RV32I-NEXT: addi a5, a6, -32 -; RV32I-NEXT: and a2, t0, a7 ; RV32I-NEXT: bltz a5, .LBB14_5 ; RV32I-NEXT: # %bb.4: ; RV32I-NEXT: sll a1, a0, a6 @@ -1245,12 +1245,12 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32ZBB-NEXT: sll a3, a6, a3 ; RV32ZBB-NEXT: or a3, a5, a3 ; RV32ZBB-NEXT: .LBB14_3: -; RV32ZBB-NEXT: srl a7, a1, a2 -; RV32ZBB-NEXT: srai t0, a4, 31 +; RV32ZBB-NEXT: srl a5, a1, a2 +; RV32ZBB-NEXT: srai a6, a4, 31 ; RV32ZBB-NEXT: neg a4, a2 +; RV32ZBB-NEXT: and a2, a6, a5 ; RV32ZBB-NEXT: andi a6, a4, 63 ; RV32ZBB-NEXT: addi a5, a6, -32 -; RV32ZBB-NEXT: and a2, t0, a7 ; RV32ZBB-NEXT: bltz a5, .LBB14_5 ; RV32ZBB-NEXT: # %bb.4: ; RV32ZBB-NEXT: sll a1, a0, a6 @@ -1289,12 +1289,12 @@ define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind { ; RV32XTHEADBB-NEXT: sll a3, a6, a3 ; RV32XTHEADBB-NEXT: or a3, a5, a3 ; RV32XTHEADBB-NEXT: .LBB14_3: -; RV32XTHEADBB-NEXT: srl a7, a1, a2 -; RV32XTHEADBB-NEXT: srai t0, a4, 31 +; RV32XTHEADBB-NEXT: srl a5, a1, a2 +; RV32XTHEADBB-NEXT: srai a6, a4, 31 ; RV32XTHEADBB-NEXT: neg a4, a2 +; RV32XTHEADBB-NEXT: and a2, a6, a5 ; RV32XTHEADBB-NEXT: andi a6, a4, 63 ; RV32XTHEADBB-NEXT: addi a5, a6, -32 -; RV32XTHEADBB-NEXT: and a2, t0, a7 ; RV32XTHEADBB-NEXT: bltz a5, .LBB14_5 ; RV32XTHEADBB-NEXT: # %bb.4: ; RV32XTHEADBB-NEXT: sll a1, a0, a6 @@ -1458,11 +1458,11 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32I-NEXT: not t0, a4 ; RV32I-NEXT: sll t1, a1, a4 ; RV32I-NEXT: srli a1, a6, 1 -; RV32I-NEXT: srl a6, a0, t0 -; RV32I-NEXT: srl t0, a1, t0 +; RV32I-NEXT: srl a0, a0, t0 +; RV32I-NEXT: srl a6, a1, t0 +; RV32I-NEXT: or a1, a7, a0 +; RV32I-NEXT: or a6, t1, a6 ; RV32I-NEXT: addi a0, a5, -32 -; RV32I-NEXT: or a1, a7, a6 -; RV32I-NEXT: or a6, t1, t0 ; RV32I-NEXT: bltz a0, .LBB17_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: sll a3, a2, a5 @@ -1512,11 +1512,11 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32ZBB-NEXT: not t0, a4 ; RV32ZBB-NEXT: sll t1, a1, a4 ; RV32ZBB-NEXT: srli a1, a6, 1 -; RV32ZBB-NEXT: srl a6, a0, t0 -; RV32ZBB-NEXT: srl t0, a1, t0 +; RV32ZBB-NEXT: srl a0, a0, t0 +; RV32ZBB-NEXT: srl a6, a1, t0 +; RV32ZBB-NEXT: or a1, a7, a0 +; RV32ZBB-NEXT: or a6, t1, a6 ; RV32ZBB-NEXT: addi a0, a5, -32 -; RV32ZBB-NEXT: or a1, a7, a6 -; RV32ZBB-NEXT: or a6, t1, t0 ; RV32ZBB-NEXT: bltz a0, .LBB17_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: sll a3, a2, a5 @@ -1562,11 +1562,11 @@ define signext i64 @rotl_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32XTHEADBB-NEXT: not t0, a4 ; RV32XTHEADBB-NEXT: sll t1, a1, a4 ; RV32XTHEADBB-NEXT: srli a1, a6, 1 -; RV32XTHEADBB-NEXT: srl a6, a0, t0 -; RV32XTHEADBB-NEXT: srl t0, a1, t0 +; RV32XTHEADBB-NEXT: srl a0, a0, t0 +; RV32XTHEADBB-NEXT: srl a6, a1, t0 +; RV32XTHEADBB-NEXT: or a1, a7, a0 +; RV32XTHEADBB-NEXT: or a6, t1, a6 ; RV32XTHEADBB-NEXT: addi a0, a5, -32 -; RV32XTHEADBB-NEXT: or a1, a7, a6 -; RV32XTHEADBB-NEXT: or a6, t1, t0 ; RV32XTHEADBB-NEXT: bltz a0, .LBB17_6 ; RV32XTHEADBB-NEXT: # %bb.5: ; RV32XTHEADBB-NEXT: sll a3, a2, a5 @@ -1683,13 +1683,13 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32I-NEXT: .LBB19_4: ; RV32I-NEXT: slli a1, a0, 1 ; RV32I-NEXT: not t0, a4 -; RV32I-NEXT: srl t1, a0, a4 +; RV32I-NEXT: srl a0, a0, a4 ; RV32I-NEXT: slli a6, a6, 1 ; RV32I-NEXT: sll a1, a1, t0 ; RV32I-NEXT: sll a6, a6, t0 -; RV32I-NEXT: addi a0, a5, -32 ; RV32I-NEXT: or a1, a1, a7 -; RV32I-NEXT: or a6, a6, t1 +; RV32I-NEXT: or a6, a6, a0 +; RV32I-NEXT: addi a0, a5, -32 ; RV32I-NEXT: bltz a0, .LBB19_6 ; RV32I-NEXT: # %bb.5: ; RV32I-NEXT: sll a3, a2, a5 @@ -1736,13 +1736,13 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32ZBB-NEXT: .LBB19_4: ; RV32ZBB-NEXT: slli a1, a0, 1 ; RV32ZBB-NEXT: not t0, a4 -; RV32ZBB-NEXT: srl t1, a0, a4 +; RV32ZBB-NEXT: srl a0, a0, a4 ; RV32ZBB-NEXT: slli a6, a6, 1 ; RV32ZBB-NEXT: sll a1, a1, t0 ; RV32ZBB-NEXT: sll a6, a6, t0 -; RV32ZBB-NEXT: addi a0, a5, -32 ; RV32ZBB-NEXT: or a1, a1, a7 -; RV32ZBB-NEXT: or a6, a6, t1 +; RV32ZBB-NEXT: or a6, a6, a0 +; RV32ZBB-NEXT: addi a0, a5, -32 ; RV32ZBB-NEXT: bltz a0, .LBB19_6 ; RV32ZBB-NEXT: # %bb.5: ; RV32ZBB-NEXT: sll a3, a2, a5 @@ -1786,13 +1786,13 @@ define signext i64 @rotr_64_mask_shared(i64 signext %a, i64 signext %b, i64 sign ; RV32XTHEADBB-NEXT: .LBB19_4: ; RV32XTHEADBB-NEXT: slli a1, a0, 1 ; RV32XTHEADBB-NEXT: not t0, a4 -; RV32XTHEADBB-NEXT: srl t1, a0, a4 +; RV32XTHEADBB-NEXT: srl a0, a0, a4 ; RV32XTHEADBB-NEXT: slli a6, a6, 1 ; RV32XTHEADBB-NEXT: sll a1, a1, t0 ; RV32XTHEADBB-NEXT: sll a6, a6, t0 -; RV32XTHEADBB-NEXT: addi a0, a5, -32 ; RV32XTHEADBB-NEXT: or a1, a1, a7 -; RV32XTHEADBB-NEXT: or a6, a6, t1 +; RV32XTHEADBB-NEXT: or a6, a6, a0 +; RV32XTHEADBB-NEXT: addi a0, a5, -32 ; RV32XTHEADBB-NEXT: bltz a0, .LBB19_6 ; RV32XTHEADBB-NEXT: # %bb.5: ; RV32XTHEADBB-NEXT: sll a3, a2, a5 @@ -2314,8 +2314,8 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; RV32I-LABEL: rotl_64_zext: ; RV32I: # %bb.0: ; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: addi a6, a2, -32 ; RV32I-NEXT: sll a5, a0, a2 +; RV32I-NEXT: addi a6, a2, -32 ; RV32I-NEXT: bltz a6, .LBB24_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a5 @@ -2362,8 +2362,8 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; RV32ZBB-LABEL: rotl_64_zext: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: neg a4, a2 -; RV32ZBB-NEXT: addi a6, a2, -32 ; RV32ZBB-NEXT: sll a5, a0, a2 +; RV32ZBB-NEXT: addi a6, a2, -32 ; RV32ZBB-NEXT: bltz a6, .LBB24_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a5 @@ -2407,8 +2407,8 @@ define i64 @rotl_64_zext(i64 %x, i32 %y) nounwind { ; RV32XTHEADBB-LABEL: rotl_64_zext: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: neg a4, a2 -; RV32XTHEADBB-NEXT: addi a6, a2, -32 ; RV32XTHEADBB-NEXT: sll a5, a0, a2 +; RV32XTHEADBB-NEXT: addi a6, a2, -32 ; RV32XTHEADBB-NEXT: bltz a6, .LBB24_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a5 @@ -2464,8 +2464,8 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind { ; RV32I-LABEL: rotr_64_zext: ; RV32I: # %bb.0: ; RV32I-NEXT: neg a4, a2 -; RV32I-NEXT: addi a6, a2, -32 ; RV32I-NEXT: srl a5, a1, a2 +; RV32I-NEXT: addi a6, a2, -32 ; RV32I-NEXT: bltz a6, .LBB25_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a3, a5 @@ -2512,8 +2512,8 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind { ; RV32ZBB-LABEL: rotr_64_zext: ; RV32ZBB: # %bb.0: ; RV32ZBB-NEXT: neg a4, a2 -; RV32ZBB-NEXT: addi a6, a2, -32 ; RV32ZBB-NEXT: srl a5, a1, a2 +; RV32ZBB-NEXT: addi a6, a2, -32 ; RV32ZBB-NEXT: bltz a6, .LBB25_2 ; RV32ZBB-NEXT: # %bb.1: ; RV32ZBB-NEXT: mv a3, a5 @@ -2557,8 +2557,8 @@ define i64 @rotr_64_zext(i64 %x, i32 %y) nounwind { ; RV32XTHEADBB-LABEL: rotr_64_zext: ; RV32XTHEADBB: # %bb.0: ; RV32XTHEADBB-NEXT: neg a4, a2 -; RV32XTHEADBB-NEXT: addi a6, a2, -32 ; RV32XTHEADBB-NEXT: srl a5, a1, a2 +; RV32XTHEADBB-NEXT: addi a6, a2, -32 ; RV32XTHEADBB-NEXT: bltz a6, .LBB25_2 ; RV32XTHEADBB-NEXT: # %bb.1: ; RV32XTHEADBB-NEXT: mv a3, a5 diff --git a/llvm/test/CodeGen/RISCV/rv32-inline-asm-pairs.ll b/llvm/test/CodeGen/RISCV/rv32-inline-asm-pairs.ll index f14fe2665835e..3f1b2fab8bb10 100644 --- a/llvm/test/CodeGen/RISCV/rv32-inline-asm-pairs.ll +++ b/llvm/test/CodeGen/RISCV/rv32-inline-asm-pairs.ll @@ -42,8 +42,8 @@ define i64 @test_Pr_wide_scalar_inout(ptr %0, i64 noundef %1) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: sw a1, 0(sp) ; CHECK-NEXT: sw a3, 4(sp) ; CHECK-NEXT: #APP @@ -112,8 +112,8 @@ define i64 @test_cR_wide_scalar_inout(ptr %0, i64 noundef %1) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: sw a0, 12(sp) ; CHECK-NEXT: sw a1, 0(sp) ; CHECK-NEXT: sw a3, 4(sp) ; CHECK-NEXT: #APP diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index 90a8eadb3f974..15cea807a26de 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -762,16 +762,16 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %a) nounwind { ; ; RV32ZBB-LABEL: ctpop_v2i64: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a2, 4(a1) -; RV32ZBB-NEXT: lw a3, 0(a1) -; RV32ZBB-NEXT: lw a4, 12(a1) -; RV32ZBB-NEXT: lw a1, 8(a1) -; RV32ZBB-NEXT: cpop a2, a2 +; RV32ZBB-NEXT: lw a2, 0(a1) +; RV32ZBB-NEXT: lw a3, 4(a1) +; RV32ZBB-NEXT: lw a4, 8(a1) +; RV32ZBB-NEXT: lw a1, 12(a1) ; RV32ZBB-NEXT: cpop a3, a3 -; RV32ZBB-NEXT: cpop a4, a4 +; RV32ZBB-NEXT: cpop a2, a2 ; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: add a2, a3, a2 -; RV32ZBB-NEXT: add a1, a1, a4 +; RV32ZBB-NEXT: cpop a4, a4 +; RV32ZBB-NEXT: add a2, a2, a3 +; RV32ZBB-NEXT: add a1, a4, a1 ; RV32ZBB-NEXT: sw a2, 0(a0) ; RV32ZBB-NEXT: sw zero, 4(a0) ; RV32ZBB-NEXT: sw a1, 8(a0) @@ -806,18 +806,18 @@ define <2 x i1> @ctpop_v2i64_ult_two(<2 x i64> %a) nounwind { ; ; RV32ZBB-LABEL: ctpop_v2i64_ult_two: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a1, 12(a0) -; RV32ZBB-NEXT: lw a2, 8(a0) -; RV32ZBB-NEXT: lw a3, 4(a0) -; RV32ZBB-NEXT: lw a0, 0(a0) -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a2, a2 -; RV32ZBB-NEXT: cpop a3, a3 +; RV32ZBB-NEXT: lw a1, 0(a0) +; RV32ZBB-NEXT: lw a2, 4(a0) +; RV32ZBB-NEXT: lw a3, 8(a0) +; RV32ZBB-NEXT: lw a0, 12(a0) ; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: add a1, a2, a1 -; RV32ZBB-NEXT: add a0, a0, a3 -; RV32ZBB-NEXT: sltiu a0, a0, 2 -; RV32ZBB-NEXT: sltiu a1, a1, 2 +; RV32ZBB-NEXT: cpop a3, a3 +; RV32ZBB-NEXT: cpop a2, a2 +; RV32ZBB-NEXT: cpop a1, a1 +; RV32ZBB-NEXT: add a3, a3, a0 +; RV32ZBB-NEXT: add a1, a1, a2 +; RV32ZBB-NEXT: sltiu a0, a1, 2 +; RV32ZBB-NEXT: sltiu a1, a3, 2 ; RV32ZBB-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp ult <2 x i64> %1, @@ -849,20 +849,20 @@ define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind { ; ; RV32ZBB-LABEL: ctpop_v2i64_ugt_one: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a1, 12(a0) -; RV32ZBB-NEXT: lw a2, 8(a0) -; RV32ZBB-NEXT: lw a3, 4(a0) -; RV32ZBB-NEXT: lw a0, 0(a0) -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a2, a2 -; RV32ZBB-NEXT: cpop a3, a3 +; RV32ZBB-NEXT: lw a1, 0(a0) +; RV32ZBB-NEXT: lw a2, 4(a0) +; RV32ZBB-NEXT: lw a3, 8(a0) +; RV32ZBB-NEXT: lw a0, 12(a0) ; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: add a1, a2, a1 -; RV32ZBB-NEXT: add a0, a0, a3 -; RV32ZBB-NEXT: sltiu a0, a0, 2 +; RV32ZBB-NEXT: cpop a3, a3 +; RV32ZBB-NEXT: cpop a2, a2 +; RV32ZBB-NEXT: cpop a1, a1 +; RV32ZBB-NEXT: add a0, a3, a0 +; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: sltiu a1, a1, 2 -; RV32ZBB-NEXT: xori a0, a0, 1 -; RV32ZBB-NEXT: xori a1, a1, 1 +; RV32ZBB-NEXT: sltiu a2, a0, 2 +; RV32ZBB-NEXT: xori a0, a1, 1 +; RV32ZBB-NEXT: xori a1, a2, 1 ; RV32ZBB-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp ugt <2 x i64> %1, @@ -904,20 +904,20 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind { ; ; RV32ZBB-LABEL: ctpop_v2i64_eq_one: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a1, 12(a0) -; RV32ZBB-NEXT: lw a2, 8(a0) -; RV32ZBB-NEXT: lw a3, 4(a0) -; RV32ZBB-NEXT: lw a0, 0(a0) -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a2, a2 -; RV32ZBB-NEXT: cpop a3, a3 +; RV32ZBB-NEXT: lw a1, 0(a0) +; RV32ZBB-NEXT: lw a2, 4(a0) +; RV32ZBB-NEXT: lw a3, 8(a0) +; RV32ZBB-NEXT: lw a0, 12(a0) ; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: add a1, a2, a1 -; RV32ZBB-NEXT: add a0, a0, a3 -; RV32ZBB-NEXT: addi a0, a0, -1 +; RV32ZBB-NEXT: cpop a3, a3 +; RV32ZBB-NEXT: cpop a2, a2 +; RV32ZBB-NEXT: cpop a1, a1 +; RV32ZBB-NEXT: add a0, a3, a0 +; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: addi a1, a1, -1 -; RV32ZBB-NEXT: seqz a0, a0 -; RV32ZBB-NEXT: seqz a1, a1 +; RV32ZBB-NEXT: addi a2, a0, -1 +; RV32ZBB-NEXT: seqz a0, a1 +; RV32ZBB-NEXT: seqz a1, a2 ; RV32ZBB-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp eq <2 x i64> %1, @@ -961,20 +961,20 @@ define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind { ; ; RV32ZBB-LABEL: ctpop_v2i64_ne_one: ; RV32ZBB: # %bb.0: -; RV32ZBB-NEXT: lw a1, 12(a0) -; RV32ZBB-NEXT: lw a2, 8(a0) -; RV32ZBB-NEXT: lw a3, 4(a0) -; RV32ZBB-NEXT: lw a0, 0(a0) -; RV32ZBB-NEXT: cpop a1, a1 -; RV32ZBB-NEXT: cpop a2, a2 -; RV32ZBB-NEXT: cpop a3, a3 +; RV32ZBB-NEXT: lw a1, 0(a0) +; RV32ZBB-NEXT: lw a2, 4(a0) +; RV32ZBB-NEXT: lw a3, 8(a0) +; RV32ZBB-NEXT: lw a0, 12(a0) ; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: add a1, a2, a1 -; RV32ZBB-NEXT: add a0, a0, a3 -; RV32ZBB-NEXT: addi a0, a0, -1 +; RV32ZBB-NEXT: cpop a3, a3 +; RV32ZBB-NEXT: cpop a2, a2 +; RV32ZBB-NEXT: cpop a1, a1 +; RV32ZBB-NEXT: add a0, a3, a0 +; RV32ZBB-NEXT: add a1, a1, a2 ; RV32ZBB-NEXT: addi a1, a1, -1 -; RV32ZBB-NEXT: snez a0, a0 -; RV32ZBB-NEXT: snez a1, a1 +; RV32ZBB-NEXT: addi a2, a0, -1 +; RV32ZBB-NEXT: snez a0, a1 +; RV32ZBB-NEXT: snez a1, a2 ; RV32ZBB-NEXT: ret %1 = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) %2 = icmp ne <2 x i64> %1, diff --git a/llvm/test/CodeGen/RISCV/rv32zbs.ll b/llvm/test/CodeGen/RISCV/rv32zbs.ll index 1a3beeb79b85b..17ea0a32cf475 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbs.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbs.ll @@ -787,8 +787,8 @@ define i64 @bset_trailing_ones_i64_mask(i64 %a) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: li a2, -1 ; CHECK-NEXT: andi a3, a0, 63 -; CHECK-NEXT: addi a1, a3, -32 ; CHECK-NEXT: sll a0, a2, a0 +; CHECK-NEXT: addi a1, a3, -32 ; CHECK-NEXT: bltz a1, .LBB43_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: sll a2, a2, a3 @@ -815,8 +815,8 @@ define i64 @bset_trailing_ones_i64_no_mask(i64 %a) nounwind { ; CHECK-LABEL: bset_trailing_ones_i64_no_mask: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, -1 -; CHECK-NEXT: addi a2, a0, -32 ; CHECK-NEXT: sll a1, a1, a0 +; CHECK-NEXT: addi a2, a0, -32 ; CHECK-NEXT: bltz a2, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rv64-double-convert.ll b/llvm/test/CodeGen/RISCV/rv64-double-convert.ll index dd49d9e3e2dce..8865f244cee1e 100644 --- a/llvm/test/CodeGen/RISCV/rv64-double-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64-double-convert.ll @@ -122,9 +122,9 @@ define i128 @fptosi_sat_f64_to_i128(double %a) nounwind { ; RV64ID-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64ID-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64ID-NEXT: fsd fs0, 8(sp) # 8-byte Folded Spill +; RV64ID-NEXT: fmv.d fs0, fa0 ; RV64ID-NEXT: lui a0, %hi(.LCPI4_0) ; RV64ID-NEXT: fld fa5, %lo(.LCPI4_0)(a0) -; RV64ID-NEXT: fmv.d fs0, fa0 ; RV64ID-NEXT: fle.d s0, fa5, fa0 ; RV64ID-NEXT: call __fixdfti ; RV64ID-NEXT: li a2, -1 diff --git a/llvm/test/CodeGen/RISCV/rv64-half-convert.ll b/llvm/test/CodeGen/RISCV/rv64-half-convert.ll index ea582ac258b71..a243d9ed68a33 100644 --- a/llvm/test/CodeGen/RISCV/rv64-half-convert.ll +++ b/llvm/test/CodeGen/RISCV/rv64-half-convert.ll @@ -309,14 +309,14 @@ define i128 @fptoui_sat_f16_to_i128(half %a) nounwind { ; RV64IZFH-NEXT: sd ra, 24(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: sd s0, 16(sp) # 8-byte Folded Spill ; RV64IZFH-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 ; RV64IZFH-NEXT: lui a0, %hi(.LCPI5_0) +; RV64IZFH-NEXT: fmv.w.x fa5, zero +; RV64IZFH-NEXT: fle.s a1, fa5, fa0 ; RV64IZFH-NEXT: flw fa5, %lo(.LCPI5_0)(a0) -; RV64IZFH-NEXT: fcvt.s.h fa0, fa0 -; RV64IZFH-NEXT: fmv.w.x fa4, zero -; RV64IZFH-NEXT: fle.s a0, fa4, fa0 -; RV64IZFH-NEXT: flt.s a1, fa5, fa0 -; RV64IZFH-NEXT: neg s0, a1 -; RV64IZFH-NEXT: neg s1, a0 +; RV64IZFH-NEXT: flt.s a0, fa5, fa0 +; RV64IZFH-NEXT: neg s0, a0 +; RV64IZFH-NEXT: neg s1, a1 ; RV64IZFH-NEXT: call __fixunssfti ; RV64IZFH-NEXT: and a0, s1, a0 ; RV64IZFH-NEXT: and a1, s1, a1 diff --git a/llvm/test/CodeGen/RISCV/rv64-inline-asm-pairs.ll b/llvm/test/CodeGen/RISCV/rv64-inline-asm-pairs.ll index ac455b7fac882..c1b8d0865dca8 100644 --- a/llvm/test/CodeGen/RISCV/rv64-inline-asm-pairs.ll +++ b/llvm/test/CodeGen/RISCV/rv64-inline-asm-pairs.ll @@ -42,8 +42,8 @@ define i128 @test_R_wide_scalar_inout(ptr %0, i128 noundef %1) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -32 ; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: sd a0, 24(sp) ; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: sd a0, 24(sp) ; CHECK-NEXT: sd a1, 0(sp) ; CHECK-NEXT: sd a3, 8(sp) ; CHECK-NEXT: #APP @@ -112,8 +112,8 @@ define i128 @test_cR_wide_scalar_inout(ptr %0, i128 noundef %1) nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -32 ; CHECK-NEXT: mv a3, a2 -; CHECK-NEXT: sd a0, 24(sp) ; CHECK-NEXT: mv a2, a1 +; CHECK-NEXT: sd a0, 24(sp) ; CHECK-NEXT: sd a1, 0(sp) ; CHECK-NEXT: sd a3, 8(sp) ; CHECK-NEXT: #APP diff --git a/llvm/test/CodeGen/RISCV/rv64-trampoline.ll b/llvm/test/CodeGen/RISCV/rv64-trampoline.ll index 1ec4d8ddd1d84..8379036b2d74d 100644 --- a/llvm/test/CodeGen/RISCV/rv64-trampoline.ll +++ b/llvm/test/CodeGen/RISCV/rv64-trampoline.ll @@ -29,8 +29,8 @@ define i64 @test0(i64 %n, ptr %p) nounwind { ; RV64-NEXT: sd a3, 16(sp) ; RV64-NEXT: sd a1, 24(sp) ; RV64-NEXT: addi a1, sp, 24 -; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: addi s1, sp, 8 +; RV64-NEXT: addi a0, sp, 8 ; RV64-NEXT: call __clear_cache ; RV64-NEXT: mv a0, s0 ; RV64-NEXT: jalr s1 @@ -60,8 +60,8 @@ define i64 @test0(i64 %n, ptr %p) nounwind { ; RV64-LINUX-NEXT: sd a3, 16(sp) ; RV64-LINUX-NEXT: sd a1, 24(sp) ; RV64-LINUX-NEXT: addi a1, sp, 24 -; RV64-LINUX-NEXT: addi a0, sp, 8 ; RV64-LINUX-NEXT: addi s1, sp, 8 +; RV64-LINUX-NEXT: addi a0, sp, 8 ; RV64-LINUX-NEXT: li a2, 0 ; RV64-LINUX-NEXT: call __riscv_flush_icache ; RV64-LINUX-NEXT: mv a0, s0 diff --git a/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll b/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll index b8c43289bdfed..dd16e2beacec2 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-demanded-bits.ll @@ -169,9 +169,9 @@ define signext i32 @andi_srliw(i32 signext %0, ptr %1, i32 signext %2) { ; CHECK-LABEL: andi_srliw: ; CHECK: # %bb.0: ; CHECK-NEXT: andi a3, a0, -8 -; CHECK-NEXT: srliw a4, a0, 3 +; CHECK-NEXT: srliw a0, a0, 3 +; CHECK-NEXT: sw a0, 0(a1) ; CHECK-NEXT: addw a0, a3, a2 -; CHECK-NEXT: sw a4, 0(a1) ; CHECK-NEXT: ret %4 = and i32 %0, -8 %5 = lshr i32 %0, 3 diff --git a/llvm/test/CodeGen/RISCV/rv64zbkb.ll b/llvm/test/CodeGen/RISCV/rv64zbkb.ll index 985837d05caa2..b87d3504ce9ff 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbkb.ll @@ -106,8 +106,8 @@ define i64 @pack_i64_3(ptr %0, ptr %1) { ; RV64I-LABEL: pack_i64_3: ; RV64I: # %bb.0: ; RV64I-NEXT: lw a0, 0(a0) -; RV64I-NEXT: lwu a1, 0(a1) ; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: lwu a1, 0(a1) ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll index 4ade6c09fe43d..fa6ae2f8b171e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/active_lane_mask.ll @@ -103,13 +103,13 @@ define <8 x i1> @fv8(ptr %p, i64 %index, i64 %tc) { define <32 x i1> @fv32(ptr %p, i64 %index, i64 %tc) { ; CHECK-LABEL: fv32: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vid.v v8 ; CHECK-NEXT: lui a0, %hi(.LCPI8_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI8_0) -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vid.v v16 -; CHECK-NEXT: vsaddu.vx v16, v16, a1 -; CHECK-NEXT: vmsltu.vx v0, v16, a2 ; CHECK-NEXT: vsext.vf8 v16, v8 ; CHECK-NEXT: vsaddu.vx v8, v16, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 @@ -130,11 +130,8 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: lui a0, %hi(.LCPI9_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_1) -; CHECK-NEXT: vle8.v v17, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI9_2) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2) ; CHECK-NEXT: vsaddu.vx v8, v8, a1 -; CHECK-NEXT: vle8.v v18, (a0) +; CHECK-NEXT: vle8.v v17, (a0) ; CHECK-NEXT: vmsltu.vx v0, v8, a2 ; CHECK-NEXT: vsext.vf8 v8, v16 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 @@ -142,13 +139,16 @@ define <64 x i1> @fv64(ptr %p, i64 %index, i64 %tc) { ; CHECK-NEXT: vsext.vf8 v8, v17 ; CHECK-NEXT: vsaddu.vx v8, v8, a1 ; CHECK-NEXT: vmsltu.vx v17, v8, a2 +; CHECK-NEXT: lui a0, %hi(.LCPI9_2) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI9_2) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v16, 2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v0, v17, 4 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vsext.vf8 v8, v18 -; CHECK-NEXT: vsaddu.vx v8, v8, a1 +; CHECK-NEXT: vsext.vf8 v16, v8 +; CHECK-NEXT: vsaddu.vx v8, v16, a1 ; CHECK-NEXT: vmsltu.vx v16, v8, a2 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v0, v16, 6 diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll index 9ac2775d30668..3f4a7fca33293 100644 --- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-array.ll @@ -17,17 +17,17 @@ define void @test(ptr %addr) { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x03, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 3 * vlenb ; CHECK-NEXT: csrrs a1, vlenb, zero ; CHECK-NEXT: vl1re64.v v8, (a0) -; CHECK-NEXT: slli a2, a1, 1 -; CHECK-NEXT: add a3, a0, a2 +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: add a3, a0, a1 ; CHECK-NEXT: vl1re64.v v9, (a3) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: add a0, a0, a3 ; CHECK-NEXT: vl1re64.v v10, (a0) -; CHECK-NEXT: add a2, a3, a2 -; CHECK-NEXT: vs1r.v v8, (a3) -; CHECK-NEXT: vs1r.v v9, (a2) -; CHECK-NEXT: vs1r.v v10, (a1) +; CHECK-NEXT: add a3, a2, a3 +; CHECK-NEXT: vs1r.v v8, (a2) +; CHECK-NEXT: vs1r.v v10, (a3) +; CHECK-NEXT: vs1r.v v9, (a1) ; CHECK-NEXT: csrrs a0, vlenb, zero ; CHECK-NEXT: slli a1, a0, 1 ; CHECK-NEXT: add a0, a1, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll index fb25d4e15e40e..5fecb75d847a0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll +++ b/llvm/test/CodeGen/RISCV/rvv/alloca-load-store-scalable-struct.ll @@ -17,14 +17,14 @@ define @test(ptr %addr, i64 %vl) { ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 2 * vlenb ; CHECK-NEXT: csrrs a2, vlenb, zero ; CHECK-NEXT: vl1re64.v v8, (a0) +; CHECK-NEXT: addi a3, sp, 16 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: vl1re64.v v9, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: add a2, a0, a2 -; CHECK-NEXT: vs1r.v v8, (a0) +; CHECK-NEXT: add a2, a3, a2 +; CHECK-NEXT: vs1r.v v8, (a3) ; CHECK-NEXT: vs1r.v v9, (a2) ; CHECK-NEXT: vl1re64.v v8, (a2) -; CHECK-NEXT: vl1re64.v v9, (a0) +; CHECK-NEXT: vl1re64.v v9, (a3) ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma ; CHECK-NEXT: vfadd.vv v8, v9, v8 ; CHECK-NEXT: csrrs a0, vlenb, zero diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll index 1ed84316d4484..d7c608fffd7a3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-sdnode.ll @@ -713,59 +713,59 @@ define @bitreverse_nxv1i64( %va) { ; RV32-NEXT: vsetvli a4, zero, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 24 ; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vsrl.vi v10, v8, 8 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: vsrl.vx v11, v8, a2 +; RV32-NEXT: vsrl.vx v11, v8, a1 +; RV32-NEXT: vsrl.vx v12, v8, a2 ; RV32-NEXT: addi a0, a3, -256 -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v11, v11, a0 -; RV32-NEXT: vlse64.v v13, (a5), zero -; RV32-NEXT: vor.vv v10, v11, v10 -; RV32-NEXT: vand.vx v11, v8, a0 -; RV32-NEXT: vsll.vx v11, v11, a2 -; RV32-NEXT: vor.vv v11, v12, v11 -; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: vand.vx v9, v9, a4 -; RV32-NEXT: vand.vv v12, v12, v13 -; RV32-NEXT: vor.vv v9, v12, v9 +; RV32-NEXT: vsll.vx v13, v8, a1 +; RV32-NEXT: vand.vx v12, v12, a0 +; RV32-NEXT: vor.vv v11, v12, v11 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vsll.vx v12, v12, a2 +; RV32-NEXT: vor.vv v12, v13, v12 +; RV32-NEXT: vlse64.v v13, (a5), zero +; RV32-NEXT: vand.vv v10, v10, v13 +; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: vand.vv v12, v8, v13 +; RV32-NEXT: vand.vv v10, v8, v13 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: addi a2, a2, 1365 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v9, v9, v10 +; RV32-NEXT: vor.vv v9, v9, v11 ; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v10, a0 +; RV32-NEXT: vmv.v.x v11, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma -; RV32-NEXT: vor.vv v8, v11, v8 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsetvli a0, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v11, a2 +; RV32-NEXT: vmv.v.x v12, a2 ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v9, v9, v11 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vand.vv v9, v9, v12 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vand.vv v9, v9, v11 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vand.vv v9, v9, v12 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: addi sp, sp, 16 @@ -852,42 +852,42 @@ define @bitreverse_nxv2i64( %va) { ; RV32-NEXT: li a2, 40 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: vsetvli a4, zero, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vsrl.vi v14, v8, 8 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: vsrl.vx v12, v8, a1 +; RV32-NEXT: vsrl.vx v16, v8, a2 ; RV32-NEXT: addi a0, a3, -256 -; RV32-NEXT: vsll.vx v18, v8, a1 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vlse64.v v14, (a5), zero -; RV32-NEXT: vor.vv v12, v12, v10 -; RV32-NEXT: vand.vx v10, v8, a0 -; RV32-NEXT: vsll.vx v10, v10, a2 -; RV32-NEXT: vor.vv v10, v18, v10 -; RV32-NEXT: vsrl.vi v18, v8, 8 -; RV32-NEXT: vand.vx v16, v16, a4 -; RV32-NEXT: vand.vv v18, v18, v14 -; RV32-NEXT: vor.vv v16, v18, v16 +; RV32-NEXT: vand.vx v18, v10, a4 +; RV32-NEXT: vsll.vx v10, v8, a1 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vsll.vx v16, v16, a2 +; RV32-NEXT: vor.vv v10, v10, v16 +; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vand.vv v14, v14, v16 +; RV32-NEXT: vor.vv v14, v14, v18 ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: vand.vv v14, v8, v14 +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: addi a2, a2, 1365 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vor.vv v12, v14, v12 ; RV32-NEXT: vsetvli a3, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v16, a0 +; RV32-NEXT: vmv.v.x v14, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma -; RV32-NEXT: vsll.vi v14, v14, 8 -; RV32-NEXT: vor.vv v8, v8, v14 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma -; RV32-NEXT: vmv.v.x v14, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV32-NEXT: vor.vv v8, v10, v8 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma @@ -895,13 +895,13 @@ define @bitreverse_nxv2i64( %va) { ; RV32-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsrl.vi v12, v8, 4 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v12, v12, v16 +; RV32-NEXT: vand.vv v8, v8, v14 +; RV32-NEXT: vand.vv v12, v12, v14 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v14 -; RV32-NEXT: vand.vv v12, v12, v14 +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v12, v12, v16 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsrl.vi v12, v8, 1 @@ -993,42 +993,42 @@ define @bitreverse_nxv4i64( %va) { ; RV32-NEXT: li a2, 40 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: vsetvli a4, zero, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: vsrl.vi v12, v8, 24 ; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a2 ; RV32-NEXT: addi a0, a3, -256 -; RV32-NEXT: vsll.vx v28, v8, a1 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vlse64.v v20, (a5), zero -; RV32-NEXT: vor.vv v16, v16, v12 -; RV32-NEXT: vand.vx v12, v8, a0 -; RV32-NEXT: vsll.vx v12, v12, a2 -; RV32-NEXT: vor.vv v12, v28, v12 -; RV32-NEXT: vsrl.vi v28, v8, 8 -; RV32-NEXT: vand.vx v24, v24, a4 -; RV32-NEXT: vand.vv v28, v28, v20 -; RV32-NEXT: vor.vv v24, v28, v24 +; RV32-NEXT: vand.vx v28, v12, a4 +; RV32-NEXT: vsll.vx v12, v8, a1 +; RV32-NEXT: vand.vx v24, v24, a0 +; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vand.vx v24, v8, a0 +; RV32-NEXT: vsll.vx v24, v24, a2 +; RV32-NEXT: vor.vv v12, v12, v24 +; RV32-NEXT: vlse64.v v24, (a5), zero +; RV32-NEXT: vand.vv v20, v20, v24 +; RV32-NEXT: vor.vv v20, v20, v28 ; RV32-NEXT: lui a0, 61681 ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: lui a2, 349525 -; RV32-NEXT: vand.vv v20, v8, v20 +; RV32-NEXT: vand.vv v24, v8, v24 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: addi a1, a1, 819 ; RV32-NEXT: addi a2, a2, 1365 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vor.vv v16, v20, v16 ; RV32-NEXT: vsetvli a3, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v24, a0 +; RV32-NEXT: vmv.v.x v20, a0 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma -; RV32-NEXT: vsll.vi v20, v20, 8 -; RV32-NEXT: vor.vv v8, v8, v20 +; RV32-NEXT: vsll.vi v24, v24, 8 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma -; RV32-NEXT: vmv.v.x v20, a1 +; RV32-NEXT: vmv.v.x v24, a1 ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vsetvli a0, zero, e32, m4, ta, ma @@ -1036,13 +1036,13 @@ define @bitreverse_nxv4i64( %va) { ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vand.vv v8, v8, v24 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vand.vv v8, v8, v20 +; RV32-NEXT: vand.vv v16, v16, v20 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v20 -; RV32-NEXT: vand.vv v16, v16, v20 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vsrl.vi v16, v8, 1 @@ -1137,38 +1137,38 @@ define @bitreverse_nxv8i64( %va) { ; RV32-NEXT: li a1, 56 ; RV32-NEXT: li a2, 40 ; RV32-NEXT: lui a3, 16 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: addi a0, a3, -256 +; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vsll.vx v0, v8, a1 -; RV32-NEXT: vand.vx v24, v24, a0 +; RV32-NEXT: vand.vx v24, v24, a3 ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vand.vx v16, v8, a3 ; RV32-NEXT: vsll.vx v16, v16, a2 ; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v0, (a5), zero -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: lui a1, 4080 +; RV32-NEXT: addi a2, sp, 8 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: vand.vx v0, v0, a1 ; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vor.vv v24, v24, v0 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24 -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vx v8, v8, a1 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vsll.vi v16, v16, 8 ; RV32-NEXT: vor.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll index 4d34621cd5f24..e2c8bc8b29171 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll @@ -1585,58 +1585,58 @@ define @vp_bitreverse_nxv1i64_unmasked( %va ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsll.vx v10, v8, a2 +; RV32-NEXT: vsll.vx v11, v8, a2 ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vsrl.vx v11, v8, a2 -; RV32-NEXT: vsrl.vx v12, v8, a4 +; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: vsrl.vx v13, v8, a4 +; RV32-NEXT: vand.vx v9, v9, a5 +; RV32-NEXT: vand.vx v13, v13, a1 +; RV32-NEXT: vor.vv v12, v13, v12 ; RV32-NEXT: vand.vx v13, v8, a1 -; RV32-NEXT: vand.vx v12, v12, a1 -; RV32-NEXT: vor.vv v11, v12, v11 -; RV32-NEXT: vlse64.v v12, (a6), zero ; RV32-NEXT: vsll.vx v13, v13, a4 -; RV32-NEXT: vor.vv v10, v10, v13 -; RV32-NEXT: vsrl.vi v13, v8, 8 -; RV32-NEXT: vand.vx v9, v9, a5 -; RV32-NEXT: vand.vv v13, v13, v12 -; RV32-NEXT: vor.vv v9, v13, v9 +; RV32-NEXT: vor.vv v11, v11, v13 +; RV32-NEXT: vlse64.v v13, (a6), zero +; RV32-NEXT: vand.vv v10, v10, v13 +; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: vand.vv v10, v8, v13 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: addi a2, a2, 819 ; RV32-NEXT: addi a3, a3, 1365 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsetvli a4, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vor.vv v9, v9, v12 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v11, a2 +; RV32-NEXT: vmv.v.x v12, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vsetvli a1, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v10, a3 +; RV32-NEXT: vmv.v.x v11, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vand.vv v9, v9, v12 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vand.vv v9, v9, v11 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vand.vv v9, v9, v12 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v9, v9, v11 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: addi sp, sp, 16 @@ -1866,23 +1866,23 @@ define @vp_bitreverse_nxv2i64_unmasked( %va ; RV32-NEXT: lui a5, 4080 ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v14, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: vsrl.vi v14, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsll.vx v12, v8, a2 ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: vsrl.vx v16, v8, a4 -; RV32-NEXT: vand.vx v18, v8, a1 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: vor.vv v10, v16, v10 +; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: vsrl.vx v18, v8, a4 +; RV32-NEXT: vand.vx v20, v10, a5 +; RV32-NEXT: vand.vx v10, v18, a1 +; RV32-NEXT: vor.vv v10, v10, v16 +; RV32-NEXT: vand.vx v16, v8, a1 +; RV32-NEXT: vsll.vx v16, v16, a4 +; RV32-NEXT: vor.vv v12, v12, v16 ; RV32-NEXT: vlse64.v v16, (a6), zero -; RV32-NEXT: vsll.vx v18, v18, a4 -; RV32-NEXT: vor.vv v12, v12, v18 -; RV32-NEXT: vsrl.vi v18, v8, 8 -; RV32-NEXT: vand.vx v14, v14, a5 -; RV32-NEXT: vand.vv v18, v18, v16 -; RV32-NEXT: vor.vv v14, v18, v14 +; RV32-NEXT: vand.vv v14, v14, v16 +; RV32-NEXT: vor.vv v14, v14, v20 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: lui a3, 349525 @@ -2148,23 +2148,23 @@ define @vp_bitreverse_nxv4i64_unmasked( %va ; RV32-NEXT: lui a5, 4080 ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v20, v8, 24 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsll.vx v16, v8, a2 ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: vsrl.vx v24, v8, a4 -; RV32-NEXT: vand.vx v28, v8, a1 -; RV32-NEXT: vand.vx v24, v24, a1 -; RV32-NEXT: vor.vv v12, v24, v12 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: vsrl.vx v28, v8, a4 +; RV32-NEXT: vand.vx v4, v12, a5 +; RV32-NEXT: vand.vx v12, v28, a1 +; RV32-NEXT: vor.vv v12, v12, v24 +; RV32-NEXT: vand.vx v24, v8, a1 +; RV32-NEXT: vsll.vx v24, v24, a4 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: vlse64.v v24, (a6), zero -; RV32-NEXT: vsll.vx v28, v28, a4 -; RV32-NEXT: vor.vv v16, v16, v28 -; RV32-NEXT: vsrl.vi v28, v8, 8 -; RV32-NEXT: vand.vx v20, v20, a5 -; RV32-NEXT: vand.vv v28, v28, v24 -; RV32-NEXT: vor.vv v20, v28, v20 +; RV32-NEXT: vand.vv v20, v20, v24 +; RV32-NEXT: vor.vv v20, v20, v4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: lui a3, 349525 @@ -2288,66 +2288,68 @@ define @vp_bitreverse_nxv7i64( %va, @vp_bitreverse_nxv7i64_unmasked( %va ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 ; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: addi a6, sp, 8 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a2 -; RV32-NEXT: addi a1, a3, -256 ; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: addi a2, sp, 8 +; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vsrl.vx v0, v8, a4 -; RV32-NEXT: vand.vx v0, v0, a1 +; RV32-NEXT: vand.vx v0, v0, a3 ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v24, v8, a1 +; RV32-NEXT: addi a6, sp, 16 +; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v24, v8, a3 ; RV32-NEXT: vsll.vx v24, v24, a4 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v24, (a6), zero -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a5 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: vand.vx v0, v24, a5 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vor.vv v0, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v24, v8, v16 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v0, v8 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: lui a3, 349525 @@ -2673,66 +2675,68 @@ define @vp_bitreverse_nxv8i64( %va, @vp_bitreverse_nxv8i64_unmasked( %va ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 ; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: addi a6, sp, 8 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v16, v8, a2 -; RV32-NEXT: addi a1, a3, -256 ; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: addi a2, sp, 8 +; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vsrl.vx v0, v8, a4 -; RV32-NEXT: vand.vx v0, v0, a1 +; RV32-NEXT: vand.vx v0, v0, a3 ; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v24, v8, a1 +; RV32-NEXT: addi a6, sp, 16 +; RV32-NEXT: vs8r.v v24, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v24, v8, a3 ; RV32-NEXT: vsll.vx v24, v24, a4 ; RV32-NEXT: vor.vv v16, v16, v24 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v24, (a6), zero -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a5 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v8, 24 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: vand.vx v0, v24, a5 +; RV32-NEXT: vsrl.vi v24, v8, 8 +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vor.vv v0, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v24, v8, v24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v24, v8, v16 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v0, v8 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: lui a3, 349525 diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll index 2cd763afa36b7..ee8bfe8910b78 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-sdnode.ll @@ -265,30 +265,30 @@ define @bswap_nxv1i64( %va) { ; RV32-NEXT: vsetvli a4, zero, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 24 ; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vsrl.vi v10, v8, 8 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsrl.vx v10, v8, a1 -; RV32-NEXT: vsrl.vx v11, v8, a2 +; RV32-NEXT: vsrl.vx v11, v8, a1 +; RV32-NEXT: vsrl.vx v12, v8, a2 ; RV32-NEXT: addi a0, a3, -256 -; RV32-NEXT: vsll.vx v12, v8, a1 -; RV32-NEXT: vand.vx v11, v11, a0 -; RV32-NEXT: vlse64.v v13, (a5), zero -; RV32-NEXT: vor.vv v10, v11, v10 -; RV32-NEXT: vand.vx v11, v8, a0 -; RV32-NEXT: vsll.vx v11, v11, a2 -; RV32-NEXT: vor.vv v11, v12, v11 -; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: vand.vx v9, v9, a4 -; RV32-NEXT: vand.vv v12, v12, v13 -; RV32-NEXT: vor.vv v9, v12, v9 -; RV32-NEXT: vand.vv v12, v8, v13 +; RV32-NEXT: vsll.vx v13, v8, a1 +; RV32-NEXT: vand.vx v12, v12, a0 +; RV32-NEXT: vor.vv v11, v12, v11 +; RV32-NEXT: vand.vx v12, v8, a0 +; RV32-NEXT: vsll.vx v12, v12, a2 +; RV32-NEXT: vor.vv v12, v13, v12 +; RV32-NEXT: vlse64.v v13, (a5), zero +; RV32-NEXT: vand.vv v10, v10, v13 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vand.vv v10, v8, v13 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v9, v9, v10 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v11, v8 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v12, v8 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -349,30 +349,30 @@ define @bswap_nxv2i64( %va) { ; RV32-NEXT: vsetvli a4, zero, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 24 ; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsrl.vx v12, v8, a1 -; RV32-NEXT: vsrl.vx v14, v8, a2 +; RV32-NEXT: vsrl.vx v14, v8, a1 +; RV32-NEXT: vsrl.vx v16, v8, a2 ; RV32-NEXT: addi a0, a3, -256 -; RV32-NEXT: vsll.vx v16, v8, a1 -; RV32-NEXT: vand.vx v14, v14, a0 -; RV32-NEXT: vlse64.v v18, (a5), zero -; RV32-NEXT: vor.vv v12, v14, v12 -; RV32-NEXT: vand.vx v14, v8, a0 -; RV32-NEXT: vsll.vx v14, v14, a2 -; RV32-NEXT: vor.vv v14, v16, v14 -; RV32-NEXT: vsrl.vi v16, v8, 8 ; RV32-NEXT: vand.vx v10, v10, a4 -; RV32-NEXT: vand.vv v16, v16, v18 -; RV32-NEXT: vor.vv v10, v16, v10 -; RV32-NEXT: vand.vv v16, v8, v18 +; RV32-NEXT: vsll.vx v18, v8, a1 +; RV32-NEXT: vand.vx v16, v16, a0 +; RV32-NEXT: vor.vv v14, v16, v14 +; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vsll.vx v16, v16, a2 +; RV32-NEXT: vor.vv v16, v18, v16 +; RV32-NEXT: vlse64.v v18, (a5), zero +; RV32-NEXT: vand.vv v12, v12, v18 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vand.vv v12, v8, v18 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v10, v10, v12 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v14, v8 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -431,32 +431,32 @@ define @bswap_nxv4i64( %va) { ; RV32-NEXT: li a2, 40 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: vsetvli a4, zero, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsrl.vi v16, v8, 24 ; RV32-NEXT: lui a4, 4080 +; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: addi a5, sp, 8 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsrl.vx v16, v8, a1 -; RV32-NEXT: vsrl.vx v20, v8, a2 +; RV32-NEXT: vsrl.vx v20, v8, a1 +; RV32-NEXT: vsrl.vx v24, v8, a2 ; RV32-NEXT: addi a0, a3, -256 -; RV32-NEXT: vsll.vx v24, v8, a1 -; RV32-NEXT: vand.vx v20, v20, a0 -; RV32-NEXT: vlse64.v v28, (a5), zero -; RV32-NEXT: vor.vv v16, v20, v16 -; RV32-NEXT: vand.vx v20, v8, a0 -; RV32-NEXT: vsll.vx v20, v20, a2 +; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: vsll.vx v28, v8, a1 +; RV32-NEXT: vand.vx v24, v24, a0 ; RV32-NEXT: vor.vv v20, v24, v20 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vx v12, v12, a4 -; RV32-NEXT: vand.vv v24, v24, v28 -; RV32-NEXT: vor.vv v12, v24, v12 -; RV32-NEXT: vand.vv v24, v8, v28 +; RV32-NEXT: vand.vx v24, v8, a0 +; RV32-NEXT: vsll.vx v24, v24, a2 +; RV32-NEXT: vor.vv v24, v28, v24 +; RV32-NEXT: vlse64.v v28, (a5), zero +; RV32-NEXT: vand.vv v12, v12, v28 +; RV32-NEXT: vor.vv v12, v12, v16 +; RV32-NEXT: vand.vv v16, v8, v28 ; RV32-NEXT: vand.vx v8, v8, a4 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v12, v12, v16 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v20, v8 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -518,38 +518,38 @@ define @bswap_nxv8i64( %va) { ; RV32-NEXT: li a1, 56 ; RV32-NEXT: li a2, 40 ; RV32-NEXT: lui a3, 16 -; RV32-NEXT: lui a4, 4080 -; RV32-NEXT: addi a5, sp, 8 -; RV32-NEXT: sw a0, 8(sp) -; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma ; RV32-NEXT: vsrl.vx v16, v8, a1 ; RV32-NEXT: vsrl.vx v24, v8, a2 -; RV32-NEXT: addi a0, a3, -256 +; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vsll.vx v0, v8, a1 -; RV32-NEXT: vand.vx v24, v24, a0 +; RV32-NEXT: vand.vx v24, v24, a3 ; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v16, v8, a0 +; RV32-NEXT: vand.vx v16, v8, a3 ; RV32-NEXT: vsll.vx v16, v16, a2 ; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v0, (a5), zero -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a4 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: lui a1, 4080 +; RV32-NEXT: addi a2, sp, 8 +; RV32-NEXT: sw a0, 8(sp) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: vand.vx v0, v0, a1 ; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vor.vv v16, v24, v16 +; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vor.vv v24, v24, v0 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v24 -; RV32-NEXT: vand.vv v16, v8, v0 -; RV32-NEXT: vand.vx v8, v8, a4 +; RV32-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vx v8, v8, a1 ; RV32-NEXT: vsll.vi v8, v8, 24 ; RV32-NEXT: vsll.vi v16, v16, 8 ; RV32-NEXT: vor.vv v8, v8, v16 diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll index 0c58cca0f9472..8243e103a9271 100644 --- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll @@ -604,29 +604,29 @@ define @vp_bswap_nxv1i64_unmasked( %va, i32 ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsll.vx v10, v8, a2 +; RV32-NEXT: vsll.vx v11, v8, a2 ; RV32-NEXT: addi a0, a3, -256 -; RV32-NEXT: vsrl.vx v11, v8, a2 -; RV32-NEXT: vsrl.vx v12, v8, a4 +; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: vsrl.vx v13, v8, a4 +; RV32-NEXT: vand.vx v9, v9, a5 +; RV32-NEXT: vand.vx v13, v13, a0 +; RV32-NEXT: vor.vv v12, v13, v12 ; RV32-NEXT: vand.vx v13, v8, a0 -; RV32-NEXT: vand.vx v12, v12, a0 -; RV32-NEXT: vor.vv v11, v12, v11 -; RV32-NEXT: vlse64.v v12, (a6), zero ; RV32-NEXT: vsll.vx v13, v13, a4 -; RV32-NEXT: vor.vv v10, v10, v13 -; RV32-NEXT: vsrl.vi v13, v8, 8 -; RV32-NEXT: vand.vx v9, v9, a5 -; RV32-NEXT: vand.vv v13, v13, v12 -; RV32-NEXT: vor.vv v9, v13, v9 -; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: vor.vv v11, v11, v13 +; RV32-NEXT: vlse64.v v13, (a6), zero +; RV32-NEXT: vand.vv v10, v10, v13 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vand.vv v10, v8, v13 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v11, v8 +; RV32-NEXT: vor.vv v9, v9, v12 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -772,29 +772,29 @@ define @vp_bswap_nxv2i64_unmasked( %va, i32 ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsll.vx v12, v8, a2 +; RV32-NEXT: vsll.vx v14, v8, a2 ; RV32-NEXT: addi a0, a3, -256 -; RV32-NEXT: vsrl.vx v14, v8, a2 -; RV32-NEXT: vsrl.vx v16, v8, a4 +; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: vsrl.vx v18, v8, a4 +; RV32-NEXT: vand.vx v10, v10, a5 +; RV32-NEXT: vand.vx v18, v18, a0 +; RV32-NEXT: vor.vv v16, v18, v16 ; RV32-NEXT: vand.vx v18, v8, a0 -; RV32-NEXT: vand.vx v16, v16, a0 -; RV32-NEXT: vor.vv v14, v16, v14 -; RV32-NEXT: vlse64.v v16, (a6), zero ; RV32-NEXT: vsll.vx v18, v18, a4 -; RV32-NEXT: vor.vv v12, v12, v18 -; RV32-NEXT: vsrl.vi v18, v8, 8 -; RV32-NEXT: vand.vx v10, v10, a5 -; RV32-NEXT: vand.vv v18, v18, v16 -; RV32-NEXT: vor.vv v10, v18, v10 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vor.vv v14, v14, v18 +; RV32-NEXT: vlse64.v v18, (a6), zero +; RV32-NEXT: vand.vv v12, v12, v18 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vand.vv v12, v8, v18 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v14, v8 +; RV32-NEXT: vor.vv v10, v10, v16 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -940,29 +940,29 @@ define @vp_bswap_nxv4i64_unmasked( %va, i32 ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsrl.vi v16, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsll.vx v16, v8, a2 +; RV32-NEXT: vsll.vx v20, v8, a2 ; RV32-NEXT: addi a0, a3, -256 -; RV32-NEXT: vsrl.vx v20, v8, a2 -; RV32-NEXT: vsrl.vx v24, v8, a4 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: vsrl.vx v28, v8, a4 +; RV32-NEXT: vand.vx v12, v12, a5 +; RV32-NEXT: vand.vx v28, v28, a0 +; RV32-NEXT: vor.vv v24, v28, v24 ; RV32-NEXT: vand.vx v28, v8, a0 -; RV32-NEXT: vand.vx v24, v24, a0 -; RV32-NEXT: vor.vv v20, v24, v20 -; RV32-NEXT: vlse64.v v24, (a6), zero ; RV32-NEXT: vsll.vx v28, v28, a4 -; RV32-NEXT: vor.vv v16, v16, v28 -; RV32-NEXT: vsrl.vi v28, v8, 8 -; RV32-NEXT: vand.vx v12, v12, a5 -; RV32-NEXT: vand.vv v28, v28, v24 -; RV32-NEXT: vor.vv v12, v28, v12 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vor.vv v20, v20, v28 +; RV32-NEXT: vlse64.v v28, (a6), zero +; RV32-NEXT: vand.vv v16, v16, v28 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vand.vv v16, v8, v28 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v20, v8 +; RV32-NEXT: vor.vv v12, v12, v24 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -1022,59 +1022,61 @@ define @vp_bswap_nxv7i64( %va, @vp_bswap_nxv7i64_unmasked( %va, i32 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 ; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: addi a6, sp, 8 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v24, v8, a2 -; RV32-NEXT: addi a0, a3, -256 ; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: addi a2, a3, -256 ; RV32-NEXT: vsrl.vx v0, v8, a4 -; RV32-NEXT: vand.vx v0, v0, a0 +; RV32-NEXT: vand.vx v0, v0, a2 ; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v8, a2 ; RV32-NEXT: vsll.vx v0, v0, a4 ; RV32-NEXT: vor.vv v16, v24, v0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v0, (a6), zero -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a5 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: vand.vx v0, v0, a5 ; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 @@ -1292,59 +1295,61 @@ define @vp_bswap_nxv8i64( %va, @vp_bswap_nxv8i64_unmasked( %va, i32 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 ; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: addi a6, sp, 8 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v24, v8, a2 -; RV32-NEXT: addi a0, a3, -256 ; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: addi a2, a3, -256 ; RV32-NEXT: vsrl.vx v0, v8, a4 -; RV32-NEXT: vand.vx v0, v0, a0 +; RV32-NEXT: vand.vx v0, v0, a2 ; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a0 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v8, a2 ; RV32-NEXT: vsll.vx v0, v0, a4 ; RV32-NEXT: vor.vv v16, v24, v0 -; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v0, (a6), zero -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a5 +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: vand.vx v0, v0, a5 ; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: vlse64.v v16, (a0), zero +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll index 15f6ca600cb37..b95bc73936059 100644 --- a/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/calling-conv-fastcc.ll @@ -75,17 +75,17 @@ define fastcc @ret_split_nxv64i32(ptr %x) { ; CHECK-NEXT: slli a4, a2, 5 ; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub a4, a4, a3 -; CHECK-NEXT: add a5, a1, a2 -; CHECK-NEXT: vl8re32.v v16, (a5) ; CHECK-NEXT: add a5, a1, a3 +; CHECK-NEXT: vl8re32.v v16, (a5) +; CHECK-NEXT: add a5, a1, a2 ; CHECK-NEXT: add a2, a0, a2 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: vl8re32.v v24, (a5) ; CHECK-NEXT: vl8re32.v v0, (a1) ; CHECK-NEXT: vs8r.v v8, (a0) -; CHECK-NEXT: vs8r.v v16, (a2) -; CHECK-NEXT: vs8r.v v24, (a3) +; CHECK-NEXT: vs8r.v v24, (a2) +; CHECK-NEXT: vs8r.v v16, (a3) ; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: vs8r.v v0, (a0) ; CHECK-NEXT: ret @@ -245,59 +245,21 @@ define fastcc @ret_nxv32i1_param_nxv32i1_nxv32i1( @ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32_i32( %x, %y, %z, i32 %w) { ; CHECK-LABEL: ret_nxv32i32_param_nxv32i32_nxv32i32_nxv32i32_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: vl8re32.v v8, (a2) -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re32.v v0, (a0) +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vsetvli a3, zero, e32, m8, ta, ma +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vl8re32.v v24, (a2) ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a2, a2, a1 ; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: vl8re32.v v8, (a0) -; CHECK-NEXT: vl8re32.v v16, (a2) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vadd.vv v0, v24, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v24, v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v8, v0, v8 -; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: vadd.vx v16, v8, a4 -; CHECK-NEXT: vadd.vx v8, v24, a4 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: vadd.vv v8, v8, v24 +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vadd.vv v16, v16, v24 +; CHECK-NEXT: vl8re32.v v24, (a2) +; CHECK-NEXT: vadd.vv v16, v16, v24 +; CHECK-NEXT: vadd.vx v16, v16, a4 +; CHECK-NEXT: vadd.vx v8, v8, a4 ; CHECK-NEXT: ret %r = add %x, %y %s = add %r, %z @@ -325,19 +287,19 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_i32( @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV32-NEXT: andi sp, sp, -128 ; RV32-NEXT: addi a1, sp, 128 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV32-NEXT: vmv8r.v v0, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: vl8re32.v v16, (a2) +; RV32-NEXT: vl8re32.v v8, (a2) ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 128 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a2, a2, a1 ; RV32-NEXT: add a3, a0, a1 -; RV32-NEXT: vl8re32.v v0, (a2) -; RV32-NEXT: vl8re32.v v24, (a3) -; RV32-NEXT: vl8re32.v v16, (a0) +; RV32-NEXT: vl8re32.v v24, (a2) +; RV32-NEXT: vl8re32.v v16, (a3) +; RV32-NEXT: vl8re32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vs8r.v v8, (a0) -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 128 -; RV32-NEXT: vs8r.v v16, (a3) +; RV32-NEXT: vs8r.v v0, (a0) +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 128 +; RV32-NEXT: vs8r.v v8, (a2) ; RV32-NEXT: add a0, a0, a1 -; RV32-NEXT: addi a2, sp, 128 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: addi a3, sp, 128 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vs8r.v v8, (a0) +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: vs8r.v v16, (a1) ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 @@ -445,16 +411,13 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 128 -; RV32-NEXT: add a1, a3, a1 ; RV32-NEXT: li a5, 42 -; RV32-NEXT: vs8r.v v24, (a1) ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 128 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32-NEXT: vmv8r.v v16, v0 +; RV32-NEXT: vmv8r.v v16, v24 ; RV32-NEXT: call ext3 ; RV32-NEXT: addi sp, s0, -144 ; RV32-NEXT: .cfi_def_cfa sp, 144 @@ -483,33 +446,37 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV64-NEXT: andi sp, sp, -128 ; RV64-NEXT: addi a1, sp, 128 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; RV64-NEXT: vmv8r.v v0, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: vl8re32.v v16, (a2) +; RV64-NEXT: vl8re32.v v8, (a2) ; RV64-NEXT: csrr a3, vlenb ; RV64-NEXT: slli a3, a3, 3 ; RV64-NEXT: add a3, sp, a3 ; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a2, a2, a1 ; RV64-NEXT: add a3, a0, a1 -; RV64-NEXT: vl8re32.v v0, (a2) -; RV64-NEXT: vl8re32.v v24, (a3) -; RV64-NEXT: vl8re32.v v16, (a0) +; RV64-NEXT: vl8re32.v v24, (a2) +; RV64-NEXT: vl8re32.v v16, (a3) +; RV64-NEXT: vl8re32.v v8, (a0) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vs8r.v v8, (a0) -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 5 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 128 -; RV64-NEXT: vs8r.v v16, (a3) +; RV64-NEXT: vs8r.v v0, (a0) +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: slli a2, a2, 5 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 128 +; RV64-NEXT: vs8r.v v8, (a2) ; RV64-NEXT: add a0, a0, a1 -; RV64-NEXT: addi a2, sp, 128 -; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV64-NEXT: addi a3, sp, 128 +; RV64-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vs8r.v v8, (a0) +; RV64-NEXT: add a1, a2, a1 +; RV64-NEXT: vs8r.v v16, (a1) ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 5 ; RV64-NEXT: add a0, sp, a0 @@ -518,16 +485,13 @@ define fastcc @ret_nxv32i32_call_nxv32i32_nxv32i32_nxv32i32_ ; RV64-NEXT: slli a2, a2, 4 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 128 -; RV64-NEXT: add a1, a3, a1 ; RV64-NEXT: li a5, 42 -; RV64-NEXT: vs8r.v v24, (a1) ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 128 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64-NEXT: vmv8r.v v16, v0 +; RV64-NEXT: vmv8r.v v16, v24 ; RV64-NEXT: call ext3 ; RV64-NEXT: addi sp, s0, -144 ; RV64-NEXT: .cfi_def_cfa sp, 144 @@ -551,11 +515,11 @@ define fastcc @vector_arg_indirect_stack(i32 %0, i32 %1, i32 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, t5, a0 -; CHECK-NEXT: vl8re32.v v24, (t5) -; CHECK-NEXT: vl8re32.v v0, (a0) +; CHECK-NEXT: vl8re32.v v24, (a0) +; CHECK-NEXT: vl8re32.v v0, (t5) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v24 -; CHECK-NEXT: vadd.vv v16, v16, v0 +; CHECK-NEXT: vadd.vv v8, v8, v0 +; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: ret %s = add %x, %z ret %s @@ -608,8 +572,8 @@ define fastcc @pass_vector_arg_indirect_stack( @pass_vector_arg_indirect_stack( @pass_vector_arg_indirect_stack_no_gpr( @pass_vector_arg_indirect_stack_no_gpr( @callee_scalable_vector_split_indirect( %x, %y ret %a @@ -41,9 +41,9 @@ define @caller_scalable_vector_split_indirect( @caller_scalable_vector_split_indirect( @vp_ceil_vv_nxv1bf16( %va, @vp_ceil_vv_nxv1bf16_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -76,12 +76,12 @@ define @vp_ceil_vv_nxv2bf16( %va, @vp_ceil_vv_nxv2bf16_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -130,12 +130,12 @@ define @vp_ceil_vv_nxv4bf16( %va, @vp_ceil_vv_nxv4bf16_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -184,12 +184,12 @@ define @vp_ceil_vv_nxv8bf16( %va, @vp_ceil_vv_nxv8bf16_unmasked( ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -238,12 +238,12 @@ define @vp_ceil_vv_nxv16bf16( %va, ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t @@ -265,11 +265,11 @@ define @vp_ceil_vv_nxv16bf16_unmasked( @vp_ceil_vv_nxv32bf16( %va, ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: lui a3, 307200 +; CHECK-NEXT: fsrmi a4, 3 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 @@ -315,11 +316,10 @@ define @vp_ceil_vv_nxv32bf16( %va, ; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 3 ; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a4 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t @@ -336,11 +336,11 @@ define @vp_ceil_vv_nxv32bf16( %va, ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t ; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t @@ -375,11 +375,12 @@ define @vp_ceil_vv_nxv32bf16_unmasked( @vp_ceil_vv_nxv32bf16_unmasked( @vp_ceil_vv_nxv32bf16_unmasked( @llvm.vp.ceil.nxv1f16(, @vp_ceil_vv_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a0) +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -461,12 +461,12 @@ define @vp_ceil_vv_nxv1f16( %va, @vp_ceil_vv_nxv1f16( %va, @vp_ceil_vv_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -503,11 +503,11 @@ define @vp_ceil_vv_nxv1f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -525,13 +525,13 @@ declare @llvm.vp.ceil.nxv2f16(, @vp_ceil_vv_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a0) +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -547,12 +547,12 @@ define @vp_ceil_vv_nxv2f16( %va, @vp_ceil_vv_nxv2f16( %va, @vp_ceil_vv_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -589,11 +589,11 @@ define @vp_ceil_vv_nxv2f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -611,13 +611,13 @@ declare @llvm.vp.ceil.nxv4f16(, @vp_ceil_vv_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a0) +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -633,12 +633,12 @@ define @vp_ceil_vv_nxv4f16( %va, @vp_ceil_vv_nxv4f16( %va, @vp_ceil_vv_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -675,11 +675,11 @@ define @vp_ceil_vv_nxv4f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -699,12 +699,12 @@ define @vp_ceil_vv_nxv8f16( %va, @vp_ceil_vv_nxv8f16( %va, @vp_ceil_vv_nxv8f16( %va, @vp_ceil_vv_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -763,11 +763,11 @@ define @vp_ceil_vv_nxv8f16_unmasked( %va, ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -787,12 +787,12 @@ define @vp_ceil_vv_nxv16f16( %va, @vp_ceil_vv_nxv16f16( %va, @vp_ceil_vv_nxv16f16( %va, @vp_ceil_vv_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -851,11 +851,11 @@ define @vp_ceil_vv_nxv16f16_unmasked( % ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -875,12 +875,12 @@ define @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16( %va, @vp_ceil_vv_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_vv_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -995,11 +995,12 @@ define @vp_ceil_vv_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: fsrmi a4, 3 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: vmv1r.v v17, v16 @@ -1014,11 +1015,10 @@ define @vp_ceil_vv_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a2, 3 ; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: fsrm a4 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t @@ -1033,10 +1033,10 @@ define @vp_ceil_vv_nxv32f16_unmasked( % ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1064,9 +1064,9 @@ define @vp_ceil_vv_nxv1f32( %va, @vp_ceil_vv_nxv1f32_unmasked( %v ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1106,9 +1106,9 @@ define @vp_ceil_vv_nxv2f32( %va, @vp_ceil_vv_nxv2f32_unmasked( %v ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1149,9 +1149,9 @@ define @vp_ceil_vv_nxv4f32( %va, @vp_ceil_vv_nxv4f32_unmasked( %v ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1193,9 +1193,9 @@ define @vp_ceil_vv_nxv8f32( %va, @vp_ceil_vv_nxv8f32_unmasked( %v ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1237,9 +1237,9 @@ define @vp_ceil_vv_nxv16f32( %va, @vp_ceil_vv_nxv16f32_unmasked( ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1276,13 +1276,13 @@ declare @llvm.vp.ceil.nxv1f64(, @vp_ceil_vv_nxv1f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -1297,12 +1297,12 @@ define @vp_ceil_vv_nxv1f64( %va, @vp_ceil_vv_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1320,12 +1320,12 @@ define @vp_ceil_vv_nxv2f64( %va, @vp_ceil_vv_nxv2f64( %va, @vp_ceil_vv_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1364,12 +1364,12 @@ define @vp_ceil_vv_nxv4f64( %va, @vp_ceil_vv_nxv4f64( %va, @vp_ceil_vv_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1408,12 +1408,12 @@ define @vp_ceil_vv_nxv7f64( %va, @vp_ceil_vv_nxv7f64( %va, @vp_ceil_vv_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1452,12 +1452,12 @@ define @vp_ceil_vv_nxv8f64( %va, @vp_ceil_vv_nxv8f64( %va, @vp_ceil_vv_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_vv_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1498,59 +1498,66 @@ define @vp_ceil_vv_nxv16f64( %va, < ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v25, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: fsrmi a3, 3 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 3 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 3 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -1570,12 +1577,12 @@ define @vp_ceil_vv_nxv16f64_unmasked( @vp_ceil_vv_nxv16f64_unmasked(, ptr %a %v2 = load <2 x i8>, ptr %b @@ -68,13 +68,13 @@ define void @v4xi8_concat_vector_insert_idx3(ptr %a, ptr %b, i8 %x) { ; CHECK-LABEL: v4xi8_concat_vector_insert_idx3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vle8.v v9, (a1) ; CHECK-NEXT: vmv.s.x v10, a2 -; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vi v9, v10, 1 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v9, v8, 2 -; CHECK-NEXT: vse8.v v9, (a0) +; CHECK-NEXT: vslideup.vi v8, v9, 2 +; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %v1 = load <2 x i8>, ptr %a %v2 = load <2 x i8>, ptr %b @@ -156,26 +156,26 @@ define void @v4xi64_concat_vector_insert_idx2(ptr %a, ptr %b, i64 %x) { ; RV32-LABEL: v4xi64_concat_vector_insert_idx2: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vle64.v v8, (a1) -; RV32-NEXT: vle64.v v10, (a0) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vle64.v v10, (a1) ; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vslideup.vi v10, v8, 2 -; RV32-NEXT: vse64.v v10, (a0) +; RV32-NEXT: vslideup.vi v8, v10, 2 +; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: v4xi64_concat_vector_insert_idx2: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a1) -; RV64-NEXT: vle64.v v10, (a0) +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vle64.v v10, (a1) ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-NEXT: vmv.s.x v8, a2 +; RV64-NEXT: vmv.s.x v10, a2 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vslideup.vi v10, v8, 2 -; RV64-NEXT: vse64.v v10, (a0) +; RV64-NEXT: vslideup.vi v8, v10, 2 +; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %v1 = load <2 x i64>, ptr %a %v2 = load <2 x i64>, ptr %b @@ -189,28 +189,28 @@ define void @v4xi64_concat_vector_insert_idx3(ptr %a, ptr %b, i64 %x) { ; RV32-LABEL: v4xi64_concat_vector_insert_idx3: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vle64.v v8, (a1) -; RV32-NEXT: vle64.v v10, (a0) +; RV32-NEXT: vle64.v v8, (a0) +; RV32-NEXT: vle64.v v10, (a1) ; RV32-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV32-NEXT: vslide1down.vx v9, v8, a2 ; RV32-NEXT: vslide1down.vx v9, v9, a3 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vslideup.vi v8, v9, 1 +; RV32-NEXT: vslideup.vi v10, v9, 1 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vslideup.vi v10, v8, 2 -; RV32-NEXT: vse64.v v10, (a0) +; RV32-NEXT: vslideup.vi v8, v10, 2 +; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; ; RV64-LABEL: v4xi64_concat_vector_insert_idx3: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a1) -; RV64-NEXT: vle64.v v10, (a0) +; RV64-NEXT: vle64.v v8, (a0) +; RV64-NEXT: vle64.v v10, (a1) ; RV64-NEXT: vmv.s.x v9, a2 -; RV64-NEXT: vslideup.vi v8, v9, 1 +; RV64-NEXT: vslideup.vi v10, v9, 1 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vslideup.vi v10, v8, 2 -; RV64-NEXT: vse64.v v10, (a0) +; RV64-NEXT: vslideup.vi v8, v10, 2 +; RV64-NEXT: vse64.v v8, (a0) ; RV64-NEXT: ret %v1 = load <2 x i64>, ptr %a %v2 = load <2 x i64>, ptr %b diff --git a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll index f6c26bbba89fe..d470b8b9bff18 100644 --- a/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/constant-folding-crash.ll @@ -31,13 +31,12 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan ; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV32-NEXT: vmv.v.i v11, 10 +; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vrgather.vi v10, v9, 0 -; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmerge.vim v8, v9, 1, v0 +; RV32-NEXT: vrgather.vi v9, v8, 0 +; RV32-NEXT: vmsne.vi v0, v9, 0 ; RV32-NEXT: vse32.v v11, (a0), v0.t ; RV32-NEXT: ret ; @@ -56,13 +55,13 @@ define void @constant_folding_crash(ptr %v54, <4 x ptr> %lanes.a, <4 x ptr> %lan ; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64-NEXT: vmv.v.i v10, 10 -; RV64-NEXT: vmv1r.v v0, v12 -; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; RV64-NEXT: vmerge.vim v9, v9, 1, v0 -; RV64-NEXT: vrgather.vi v11, v9, 0 -; RV64-NEXT: vmsne.vi v0, v11, 0 ; RV64-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV64-NEXT: vmv.x.s a0, v8 +; RV64-NEXT: vmv1r.v v0, v12 +; RV64-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; RV64-NEXT: vmerge.vim v8, v9, 1, v0 +; RV64-NEXT: vrgather.vi v9, v8, 0 +; RV64-NEXT: vmsne.vi v0, v9, 0 ; RV64-NEXT: vse32.v v10, (a0), v0.t ; RV64-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir index 0b905b57f92b8..ce78f5a367d01 100644 --- a/llvm/test/CodeGen/RISCV/rvv/copyprop.mir +++ b/llvm/test/CodeGen/RISCV/rvv/copyprop.mir @@ -6,9 +6,9 @@ ; CHECK-LABEL: foo: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma + ; CHECK-NEXT: vsll.vi v9, v8, 5 ; CHECK-NEXT: vmsne.vi v0, v8, 0 - ; CHECK-NEXT: vsll.vi v8, v8, 5 - ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 + ; CHECK-NEXT: vmerge.vim v8, v9, -1, v0 ; CHECK-NEXT: sf.vc.v.x 3, 31, v9, a1 ; CHECK-NEXT: bgeu a0, zero, .LBB0_3 ; CHECK-NEXT: # %bb.1: # %entry diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll index 208735b18cbab..024e976d8880c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-sdnode.ll @@ -809,12 +809,12 @@ define @ctlz_nxv1i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vrsub.vx v8, v8, a1 -; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 158 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 32 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv1i32: @@ -881,12 +881,12 @@ define @ctlz_nxv2i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vrsub.vx v8, v8, a1 -; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 158 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 32 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv2i32: @@ -953,12 +953,12 @@ define @ctlz_nxv4i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vrsub.vx v8, v8, a1 -; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 158 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 32 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv4i32: @@ -1025,12 +1025,12 @@ define @ctlz_nxv8i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vrsub.vx v8, v8, a1 -; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 158 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 32 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv8i32: @@ -1097,12 +1097,12 @@ define @ctlz_nxv16i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vrsub.vx v8, v8, a1 -; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vminu.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 158 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 32 +; CHECK-F-NEXT: vminu.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv16i32: @@ -1110,12 +1110,12 @@ define @ctlz_nxv16i32( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: li a1, 158 -; CHECK-D-NEXT: vsrl.vi v8, v8, 23 -; CHECK-D-NEXT: vrsub.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 32 -; CHECK-D-NEXT: vminu.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 158 +; CHECK-D-NEXT: vsrl.vi v8, v8, 23 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 32 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_nxv16i32: @@ -1232,16 +1232,16 @@ define @ctlz_nxv1i64( %va) { ; CHECK-F-LABEL: ctlz_nxv1i64: ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: fsrmi a1, 1 -; CHECK-F-NEXT: vsetvli a2, zero, e32, mf2, ta, ma -; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8 -; CHECK-F-NEXT: vmv.v.x v8, a0 -; CHECK-F-NEXT: vsrl.vi v9, v9, 23 -; CHECK-F-NEXT: vwsubu.vv v10, v8, v9 +; CHECK-F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vmv.v.x v9, a0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v10, 23 +; CHECK-F-NEXT: vwsubu.vv v10, v9, v8 ; CHECK-F-NEXT: li a0, 64 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-F-NEXT: vminu.vx v8, v10, a0 -; CHECK-F-NEXT: fsrm a1 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv1i64: @@ -1249,13 +1249,13 @@ define @ctlz_nxv1i64( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vminu.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1086 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 64 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_nxv1i64: @@ -1372,16 +1372,16 @@ define @ctlz_nxv2i64( %va) { ; CHECK-F-LABEL: ctlz_nxv2i64: ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: fsrmi a1, 1 -; CHECK-F-NEXT: vsetvli a2, zero, e32, m1, ta, ma -; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-F-NEXT: vmv.v.x v8, a0 -; CHECK-F-NEXT: vsrl.vi v9, v10, 23 -; CHECK-F-NEXT: vwsubu.vv v10, v8, v9 +; CHECK-F-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vmv.v.x v10, a0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v11, v8 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v11, 23 +; CHECK-F-NEXT: vwsubu.vv v12, v10, v8 ; CHECK-F-NEXT: li a0, 64 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v10, a0 -; CHECK-F-NEXT: fsrm a1 +; CHECK-F-NEXT: vminu.vx v8, v12, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv2i64: @@ -1389,13 +1389,13 @@ define @ctlz_nxv2i64( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vminu.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1086 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 64 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_nxv2i64: @@ -1512,16 +1512,16 @@ define @ctlz_nxv4i64( %va) { ; CHECK-F-LABEL: ctlz_nxv4i64: ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: fsrmi a1, 1 -; CHECK-F-NEXT: vsetvli a2, zero, e32, m2, ta, ma -; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-F-NEXT: vmv.v.x v8, a0 -; CHECK-F-NEXT: vsrl.vi v10, v12, 23 -; CHECK-F-NEXT: vwsubu.vv v12, v8, v10 +; CHECK-F-NEXT: vsetvli a1, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vmv.v.x v12, a0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v14, v8 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v14, 23 +; CHECK-F-NEXT: vwsubu.vv v16, v12, v8 ; CHECK-F-NEXT: li a0, 64 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v12, a0 -; CHECK-F-NEXT: fsrm a1 +; CHECK-F-NEXT: vminu.vx v8, v16, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv4i64: @@ -1529,13 +1529,13 @@ define @ctlz_nxv4i64( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vminu.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1086 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 64 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_nxv4i64: @@ -1652,16 +1652,16 @@ define @ctlz_nxv8i64( %va) { ; CHECK-F-LABEL: ctlz_nxv8i64: ; CHECK-F: # %bb.0: ; CHECK-F-NEXT: li a0, 190 -; CHECK-F-NEXT: fsrmi a1, 1 -; CHECK-F-NEXT: vsetvli a2, zero, e32, m4, ta, ma -; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-F-NEXT: vmv.v.x v8, a0 -; CHECK-F-NEXT: vsrl.vi v12, v16, 23 -; CHECK-F-NEXT: vwsubu.vv v16, v8, v12 +; CHECK-F-NEXT: vsetvli a1, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vmv.v.x v16, a0 +; CHECK-F-NEXT: fsrmi a0, 1 +; CHECK-F-NEXT: vfncvt.f.xu.w v20, v8 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v20, 23 +; CHECK-F-NEXT: vwsubu.vv v24, v16, v8 ; CHECK-F-NEXT: li a0, 64 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vminu.vx v8, v16, a0 -; CHECK-F-NEXT: fsrm a1 +; CHECK-F-NEXT: vminu.vx v8, v24, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_nxv8i64: @@ -1669,13 +1669,13 @@ define @ctlz_nxv8i64( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vminu.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1086 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 64 +; CHECK-D-NEXT: vminu.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_nxv8i64: @@ -2436,10 +2436,10 @@ define @ctlz_zero_undef_nxv1i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vsetvli a1, zero, e32, mf2, ta, ma ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vrsub.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a0, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i32: @@ -2503,10 +2503,10 @@ define @ctlz_zero_undef_nxv2i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vsetvli a1, zero, e32, m1, ta, ma ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vrsub.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a0, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i32: @@ -2570,10 +2570,10 @@ define @ctlz_zero_undef_nxv4i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vsetvli a1, zero, e32, m2, ta, ma ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vrsub.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a0, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i32: @@ -2637,10 +2637,10 @@ define @ctlz_zero_undef_nxv8i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vsetvli a1, zero, e32, m4, ta, ma ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vrsub.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a0, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i32: @@ -2704,10 +2704,10 @@ define @ctlz_zero_undef_nxv16i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: li a1, 158 -; CHECK-F-NEXT: vrsub.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a0, 158 +; CHECK-F-NEXT: vrsub.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv16i32: @@ -2715,10 +2715,10 @@ define @ctlz_zero_undef_nxv16i32( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vsetvli a1, zero, e32, m8, ta, ma ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: vsrl.vi v8, v8, 23 -; CHECK-D-NEXT: li a1, 158 -; CHECK-D-NEXT: vrsub.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: vsrl.vi v8, v8, 23 +; CHECK-D-NEXT: li a0, 158 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv16i32: @@ -2838,9 +2838,9 @@ define @ctlz_zero_undef_nxv1i64( %va) { ; CHECK-F-NEXT: vmv.v.x v9, a0 ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 +; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: vsrl.vi v10, v10, 23 ; CHECK-F-NEXT: vwsubu.vv v8, v9, v10 -; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv1i64: @@ -2848,11 +2848,11 @@ define @ctlz_zero_undef_nxv1i64( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1086 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv1i64: @@ -2972,9 +2972,9 @@ define @ctlz_zero_undef_nxv2i64( %va) { ; CHECK-F-NEXT: vmv.v.x v10, a0 ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vfncvt.f.xu.w v11, v8 +; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: vsrl.vi v11, v11, 23 ; CHECK-F-NEXT: vwsubu.vv v8, v10, v11 -; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv2i64: @@ -2982,11 +2982,11 @@ define @ctlz_zero_undef_nxv2i64( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vsetvli a1, zero, e64, m2, ta, ma ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1086 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv2i64: @@ -3106,9 +3106,9 @@ define @ctlz_zero_undef_nxv4i64( %va) { ; CHECK-F-NEXT: vmv.v.x v12, a0 ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vfncvt.f.xu.w v14, v8 +; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: vsrl.vi v14, v14, 23 ; CHECK-F-NEXT: vwsubu.vv v8, v12, v14 -; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv4i64: @@ -3116,11 +3116,11 @@ define @ctlz_zero_undef_nxv4i64( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1086 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv4i64: @@ -3240,9 +3240,9 @@ define @ctlz_zero_undef_nxv8i64( %va) { ; CHECK-F-NEXT: vmv.v.x v16, a0 ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vfncvt.f.xu.w v20, v8 +; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: vsrl.vi v20, v20, 23 ; CHECK-F-NEXT: vwsubu.vv v8, v16, v20 -; CHECK-F-NEXT: fsrm a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: ctlz_zero_undef_nxv8i64: @@ -3250,11 +3250,11 @@ define @ctlz_zero_undef_nxv8i64( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: li a1, 52 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1086 -; CHECK-D-NEXT: vrsub.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1086 +; CHECK-D-NEXT: vrsub.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: ctlz_zero_undef_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll index 6f515996677ee..39582ee3dacae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll @@ -940,12 +940,12 @@ define @vp_ctlz_nxv16i32( %va, @vp_ctlz_nxv16i32_unmasked( %va, i ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: li a0, 158 ; CHECK-NEXT: vsrl.vi v8, v8, 23 ; CHECK-NEXT: vrsub.vx v8, v8, a0 ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vminu.vx v8, v8, a0 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i32_unmasked: @@ -988,13 +988,13 @@ define @vp_ctlz_nxv1i64( %va, @vp_ctlz_nxv1i64_unmasked( %va, i32 ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: li a0, 52 ; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: li a0, 1086 ; CHECK-NEXT: vrsub.vx v8, v8, a0 ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vminu.vx v8, v8, a0 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i64_unmasked: @@ -1038,13 +1038,13 @@ define @vp_ctlz_nxv2i64( %va, @vp_ctlz_nxv2i64_unmasked( %va, i32 ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: li a0, 52 ; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: li a0, 1086 ; CHECK-NEXT: vrsub.vx v8, v8, a0 ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vminu.vx v8, v8, a0 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv2i64_unmasked: @@ -1088,13 +1088,13 @@ define @vp_ctlz_nxv4i64( %va, @vp_ctlz_nxv4i64_unmasked( %va, i32 ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: li a0, 52 ; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: li a0, 1086 ; CHECK-NEXT: vrsub.vx v8, v8, a0 ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vminu.vx v8, v8, a0 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv4i64_unmasked: @@ -1138,13 +1138,13 @@ define @vp_ctlz_nxv7i64( %va, @vp_ctlz_nxv7i64_unmasked( %va, i32 ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: li a0, 52 ; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: li a0, 1086 ; CHECK-NEXT: vrsub.vx v8, v8, a0 ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vminu.vx v8, v8, a0 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv7i64_unmasked: @@ -1188,13 +1188,13 @@ define @vp_ctlz_nxv8i64( %va, @vp_ctlz_nxv8i64_unmasked( %va, i32 ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: li a0, 52 ; CHECK-NEXT: vsrl.vx v8, v8, a0 ; CHECK-NEXT: li a0, 1086 ; CHECK-NEXT: vrsub.vx v8, v8, a0 ; CHECK-NEXT: li a0, 64 ; CHECK-NEXT: vminu.vx v8, v8, a0 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv8i64_unmasked: @@ -1258,14 +1258,14 @@ define @vp_ctlz_nxv16i64( %va, @vp_ctlz_nxv16i64_unmasked( %va, i ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vsrl.vx v8, v8, a2 ; CHECK-NEXT: vrsub.vx v8, v8, a3 ; CHECK-NEXT: vminu.vx v8, v8, a4 -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_nxv16i64_unmasked: @@ -2201,10 +2201,10 @@ define @vp_ctlz_zero_undef_nxv16i32( %va, ; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: li a0, 158 ; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t ; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i32: @@ -2222,10 +2222,10 @@ define @vp_ctlz_zero_undef_nxv16i32_unmasked( @vp_ctlz_zero_undef_nxv1i64( %va, @vp_ctlz_zero_undef_nxv1i64_unmasked( @vp_ctlz_zero_undef_nxv2i64( %va, @vp_ctlz_zero_undef_nxv2i64_unmasked( @vp_ctlz_zero_undef_nxv4i64( %va, @vp_ctlz_zero_undef_nxv4i64_unmasked( @vp_ctlz_zero_undef_nxv7i64( %va, @vp_ctlz_zero_undef_nxv7i64_unmasked( @vp_ctlz_zero_undef_nxv8i64( %va, @vp_ctlz_zero_undef_nxv8i64_unmasked( @vp_ctlz_zero_undef_nxv16i64( %va, ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB94_2: -; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t ; CHECK-NEXT: vrsub.vx v8, v8, a3, v0.t -; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv16i64: @@ -2543,9 +2543,9 @@ define @vp_ctlz_zero_undef_nxv16i64_unmasked( @cttz_nxv1i8( %va) { ; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv1i8: @@ -59,9 +59,9 @@ define @cttz_nxv1i8( %va) { ; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv1i8: @@ -108,9 +108,9 @@ define @cttz_nxv2i8( %va) { ; CHECK-F-NEXT: vnsrl.wi v9, v9, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv2i8: @@ -125,9 +125,9 @@ define @cttz_nxv2i8( %va) { ; CHECK-D-NEXT: vnsrl.wi v9, v9, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv2i8: @@ -174,9 +174,9 @@ define @cttz_nxv4i8( %va) { ; CHECK-F-NEXT: vnsrl.wi v9, v12, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv4i8: @@ -191,9 +191,9 @@ define @cttz_nxv4i8( %va) { ; CHECK-D-NEXT: vnsrl.wi v9, v12, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v9, 0 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv4i8: @@ -240,9 +240,9 @@ define @cttz_nxv8i8( %va) { ; CHECK-F-NEXT: vnsrl.wi v10, v12, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-F-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-F-NEXT: vsub.vx v9, v9, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v9, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv8i8: @@ -257,9 +257,9 @@ define @cttz_nxv8i8( %va) { ; CHECK-D-NEXT: vnsrl.wi v10, v12, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-D-NEXT: vnsrl.wi v9, v10, 0 +; CHECK-D-NEXT: vsub.vx v9, v9, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v9, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv8i8: @@ -306,9 +306,9 @@ define @cttz_nxv16i8( %va) { ; CHECK-F-NEXT: vnsrl.wi v12, v16, 23 ; CHECK-F-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-F-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-F-NEXT: vsub.vx v10, v10, a0 ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vsub.vx v8, v10, a0 -; CHECK-F-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-F-NEXT: vmerge.vim v8, v10, 8, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv16i8: @@ -323,9 +323,9 @@ define @cttz_nxv16i8( %va) { ; CHECK-D-NEXT: vnsrl.wi v12, v16, 23 ; CHECK-D-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-D-NEXT: vnsrl.wi v10, v12, 0 +; CHECK-D-NEXT: vsub.vx v10, v10, a0 ; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v10, a0 -; CHECK-D-NEXT: vmerge.vim v8, v8, 8, v0 +; CHECK-D-NEXT: vmerge.vim v8, v10, 8, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv16i8: @@ -811,15 +811,15 @@ define @cttz_nxv1i32( %va) { ; CHECK-F-NEXT: vsetvli a0, zero, e32, mf2, ta, ma ; CHECK-F-NEXT: vrsub.vi v9, v8, 0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: li a1, 127 ; CHECK-F-NEXT: vand.vv v9, v8, v9 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vfcvt.f.xu.v v8, v9 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vsub.vx v8, v8, a1 -; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vmerge.vxm v8, v8, a1, v0 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v9 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsrl.vi v8, v9, 23 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv1i32: @@ -882,15 +882,15 @@ define @cttz_nxv2i32( %va) { ; CHECK-F-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-F-NEXT: vrsub.vi v9, v8, 0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: li a1, 127 ; CHECK-F-NEXT: vand.vv v9, v8, v9 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vfcvt.f.xu.v v8, v9 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vsub.vx v8, v8, a1 -; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vmerge.vxm v8, v8, a1, v0 +; CHECK-F-NEXT: vfcvt.f.xu.v v9, v9 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsrl.vi v8, v9, 23 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv2i32: @@ -953,15 +953,15 @@ define @cttz_nxv4i32( %va) { ; CHECK-F-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; CHECK-F-NEXT: vrsub.vi v10, v8, 0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: li a1, 127 ; CHECK-F-NEXT: vand.vv v10, v8, v10 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vfcvt.f.xu.v v8, v10 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vsub.vx v8, v8, a1 -; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vmerge.vxm v8, v8, a1, v0 +; CHECK-F-NEXT: vfcvt.f.xu.v v10, v10 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsrl.vi v8, v10, 23 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv4i32: @@ -1024,15 +1024,15 @@ define @cttz_nxv8i32( %va) { ; CHECK-F-NEXT: vsetvli a0, zero, e32, m4, ta, ma ; CHECK-F-NEXT: vrsub.vi v12, v8, 0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: li a1, 127 ; CHECK-F-NEXT: vand.vv v12, v8, v12 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vfcvt.f.xu.v v8, v12 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vsub.vx v8, v8, a1 -; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vmerge.vxm v8, v8, a1, v0 +; CHECK-F-NEXT: vfcvt.f.xu.v v12, v12 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsrl.vi v8, v12, 23 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv8i32: @@ -1095,15 +1095,15 @@ define @cttz_nxv16i32( %va) { ; CHECK-F-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-F-NEXT: vrsub.vi v16, v8, 0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: li a1, 127 ; CHECK-F-NEXT: vand.vv v16, v8, v16 -; CHECK-F-NEXT: vmseq.vi v0, v8, 0 -; CHECK-F-NEXT: vfcvt.f.xu.v v8, v16 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vsub.vx v8, v8, a1 -; CHECK-F-NEXT: li a1, 32 -; CHECK-F-NEXT: vmerge.vxm v8, v8, a1, v0 +; CHECK-F-NEXT: vfcvt.f.xu.v v16, v16 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vmseq.vi v0, v8, 0 +; CHECK-F-NEXT: vsrl.vi v8, v16, 23 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 +; CHECK-F-NEXT: li a0, 32 +; CHECK-F-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv16i32: @@ -1111,15 +1111,15 @@ define @cttz_nxv16i32( %va) { ; CHECK-D-NEXT: vsetvli a0, zero, e32, m8, ta, ma ; CHECK-D-NEXT: vrsub.vi v16, v8, 0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: li a1, 127 ; CHECK-D-NEXT: vand.vv v16, v8, v16 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vfcvt.f.xu.v v8, v16 -; CHECK-D-NEXT: vsrl.vi v8, v8, 23 -; CHECK-D-NEXT: vsub.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 32 -; CHECK-D-NEXT: vmerge.vxm v8, v8, a1, v0 +; CHECK-D-NEXT: vfcvt.f.xu.v v16, v16 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vsrl.vi v8, v16, 23 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 32 +; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv16i32: @@ -1218,17 +1218,19 @@ define @cttz_nxv1i64( %va) { ; CHECK-F-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-F-NEXT: vrsub.vi v9, v8, 0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: li a1, 127 ; CHECK-F-NEXT: vand.vv v9, v8, v9 +; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-F-NEXT: vfncvt.f.xu.w v10, v9 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 ; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-F-NEXT: vfncvt.f.xu.w v8, v9 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vwsubu.vx v9, v8, a1 -; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vsrl.vi v8, v10, 23 +; CHECK-F-NEXT: vwsubu.vx v9, v8, a0 +; CHECK-F-NEXT: li a0, 64 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-F-NEXT: vmerge.vxm v8, v9, a1, v0 -; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vmerge.vxm v8, v9, a0, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv1i64: @@ -1236,16 +1238,16 @@ define @cttz_nxv1i64( %va) { ; CHECK-D-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-D-NEXT: vrsub.vi v9, v8, 0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: li a1, 52 ; CHECK-D-NEXT: vand.vv v9, v8, v9 ; CHECK-D-NEXT: vfcvt.f.xu.v v9, v9 -; CHECK-D-NEXT: vsrl.vx v9, v9, a1 -; CHECK-D-NEXT: li a1, 1023 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v9, a1 -; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vmerge.vxm v8, v8, a1, v0 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v9, v9, a0 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vsub.vx v8, v9, a0 +; CHECK-D-NEXT: li a0, 64 +; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv1i64: @@ -1344,17 +1346,19 @@ define @cttz_nxv2i64( %va) { ; CHECK-F-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-F-NEXT: vrsub.vi v10, v8, 0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: li a1, 127 ; CHECK-F-NEXT: vand.vv v10, v8, v10 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-F-NEXT: vfncvt.f.xu.w v12, v10 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 ; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-F-NEXT: vfncvt.f.xu.w v8, v10 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vwsubu.vx v10, v8, a1 -; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vsrl.vi v8, v12, 23 +; CHECK-F-NEXT: vwsubu.vx v10, v8, a0 +; CHECK-F-NEXT: li a0, 64 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-F-NEXT: vmerge.vxm v8, v10, a1, v0 -; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vmerge.vxm v8, v10, a0, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv2i64: @@ -1362,16 +1366,16 @@ define @cttz_nxv2i64( %va) { ; CHECK-D-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-D-NEXT: vrsub.vi v10, v8, 0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: li a1, 52 ; CHECK-D-NEXT: vand.vv v10, v8, v10 ; CHECK-D-NEXT: vfcvt.f.xu.v v10, v10 -; CHECK-D-NEXT: vsrl.vx v10, v10, a1 -; CHECK-D-NEXT: li a1, 1023 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v10, a1 -; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vmerge.vxm v8, v8, a1, v0 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v10, v10, a0 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vsub.vx v8, v10, a0 +; CHECK-D-NEXT: li a0, 64 +; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv2i64: @@ -1470,17 +1474,19 @@ define @cttz_nxv4i64( %va) { ; CHECK-F-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-F-NEXT: vrsub.vi v12, v8, 0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: li a1, 127 ; CHECK-F-NEXT: vand.vv v12, v8, v12 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-F-NEXT: vfncvt.f.xu.w v16, v12 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 ; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-F-NEXT: vfncvt.f.xu.w v8, v12 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vwsubu.vx v12, v8, a1 -; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vsrl.vi v8, v16, 23 +; CHECK-F-NEXT: vwsubu.vx v12, v8, a0 +; CHECK-F-NEXT: li a0, 64 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; CHECK-F-NEXT: vmerge.vxm v8, v12, a1, v0 -; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vmerge.vxm v8, v12, a0, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv4i64: @@ -1488,16 +1494,16 @@ define @cttz_nxv4i64( %va) { ; CHECK-D-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-D-NEXT: vrsub.vi v12, v8, 0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: li a1, 52 ; CHECK-D-NEXT: vand.vv v12, v8, v12 ; CHECK-D-NEXT: vfcvt.f.xu.v v12, v12 -; CHECK-D-NEXT: vsrl.vx v12, v12, a1 -; CHECK-D-NEXT: li a1, 1023 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v12, a1 -; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vmerge.vxm v8, v8, a1, v0 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v12, v12, a0 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vsub.vx v8, v12, a0 +; CHECK-D-NEXT: li a0, 64 +; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv4i64: @@ -1596,17 +1602,19 @@ define @cttz_nxv8i64( %va) { ; CHECK-F-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-F-NEXT: vrsub.vi v16, v8, 0 ; CHECK-F-NEXT: fsrmi a0, 1 -; CHECK-F-NEXT: li a1, 127 ; CHECK-F-NEXT: vand.vv v16, v8, v16 +; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma +; CHECK-F-NEXT: vfncvt.f.xu.w v24, v16 +; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-F-NEXT: vmseq.vi v0, v8, 0 ; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-F-NEXT: vfncvt.f.xu.w v8, v16 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: vwsubu.vx v16, v8, a1 -; CHECK-F-NEXT: li a1, 64 +; CHECK-F-NEXT: vsrl.vi v8, v24, 23 +; CHECK-F-NEXT: vwsubu.vx v16, v8, a0 +; CHECK-F-NEXT: li a0, 64 ; CHECK-F-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-F-NEXT: vmerge.vxm v8, v16, a1, v0 -; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vmerge.vxm v8, v16, a0, v0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_nxv8i64: @@ -1614,16 +1622,16 @@ define @cttz_nxv8i64( %va) { ; CHECK-D-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-D-NEXT: vrsub.vi v16, v8, 0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: li a1, 52 ; CHECK-D-NEXT: vand.vv v16, v8, v16 ; CHECK-D-NEXT: vfcvt.f.xu.v v16, v16 -; CHECK-D-NEXT: vsrl.vx v16, v16, a1 -; CHECK-D-NEXT: li a1, 1023 -; CHECK-D-NEXT: vmseq.vi v0, v8, 0 -; CHECK-D-NEXT: vsub.vx v8, v16, a1 -; CHECK-D-NEXT: li a1, 64 -; CHECK-D-NEXT: vmerge.vxm v8, v8, a1, v0 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v16, v16, a0 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vmseq.vi v0, v8, 0 +; CHECK-D-NEXT: vsub.vx v8, v16, a0 +; CHECK-D-NEXT: li a0, 64 +; CHECK-D-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_nxv8i64: @@ -2378,10 +2386,10 @@ define @cttz_zero_undef_nxv1i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vand.vv v8, v8, v9 ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv1i32: @@ -2442,10 +2450,10 @@ define @cttz_zero_undef_nxv2i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vand.vv v8, v8, v9 ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv2i32: @@ -2506,10 +2514,10 @@ define @cttz_zero_undef_nxv4i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vand.vv v8, v8, v10 ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv4i32: @@ -2570,10 +2578,10 @@ define @cttz_zero_undef_nxv8i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vand.vv v8, v8, v12 ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv8i32: @@ -2634,10 +2642,10 @@ define @cttz_zero_undef_nxv16i32( %va) { ; CHECK-F-NEXT: fsrmi a0, 1 ; CHECK-F-NEXT: vand.vv v8, v8, v16 ; CHECK-F-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-F-NEXT: vsrl.vi v8, v8, 23 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vsub.vx v8, v8, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v8, v8, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vsub.vx v8, v8, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv16i32: @@ -2647,10 +2655,10 @@ define @cttz_zero_undef_nxv16i32( %va) { ; CHECK-D-NEXT: fsrmi a0, 1 ; CHECK-D-NEXT: vand.vv v8, v8, v16 ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: vsrl.vi v8, v8, 23 -; CHECK-D-NEXT: li a1, 127 -; CHECK-D-NEXT: vsub.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: vsrl.vi v8, v8, 23 +; CHECK-D-NEXT: li a0, 127 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv16i32: @@ -2751,10 +2759,10 @@ define @cttz_zero_undef_nxv1i64( %va) { ; CHECK-F-NEXT: vand.vv v8, v8, v9 ; CHECK-F-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-F-NEXT: vfncvt.f.xu.w v9, v8 -; CHECK-F-NEXT: vsrl.vi v9, v9, 23 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vwsubu.vx v8, v9, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v9, v9, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vwsubu.vx v8, v9, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv1i64: @@ -2762,13 +2770,13 @@ define @cttz_zero_undef_nxv1i64( %va) { ; CHECK-D-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-D-NEXT: vrsub.vi v9, v8, 0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: li a1, 52 ; CHECK-D-NEXT: vand.vv v8, v8, v9 ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1023 -; CHECK-D-NEXT: vsub.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv1i64: @@ -2869,10 +2877,10 @@ define @cttz_zero_undef_nxv2i64( %va) { ; CHECK-F-NEXT: vand.vv v8, v8, v10 ; CHECK-F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-F-NEXT: vfncvt.f.xu.w v10, v8 -; CHECK-F-NEXT: vsrl.vi v10, v10, 23 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vwsubu.vx v8, v10, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v10, v10, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vwsubu.vx v8, v10, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv2i64: @@ -2880,13 +2888,13 @@ define @cttz_zero_undef_nxv2i64( %va) { ; CHECK-D-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-D-NEXT: vrsub.vi v10, v8, 0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: li a1, 52 ; CHECK-D-NEXT: vand.vv v8, v8, v10 ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1023 -; CHECK-D-NEXT: vsub.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv2i64: @@ -2987,10 +2995,10 @@ define @cttz_zero_undef_nxv4i64( %va) { ; CHECK-F-NEXT: vand.vv v8, v8, v12 ; CHECK-F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-F-NEXT: vfncvt.f.xu.w v12, v8 -; CHECK-F-NEXT: vsrl.vi v12, v12, 23 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vwsubu.vx v8, v12, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v12, v12, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vwsubu.vx v8, v12, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv4i64: @@ -2998,13 +3006,13 @@ define @cttz_zero_undef_nxv4i64( %va) { ; CHECK-D-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-D-NEXT: vrsub.vi v12, v8, 0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: li a1, 52 ; CHECK-D-NEXT: vand.vv v8, v8, v12 ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1023 -; CHECK-D-NEXT: vsub.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv4i64: @@ -3105,10 +3113,10 @@ define @cttz_zero_undef_nxv8i64( %va) { ; CHECK-F-NEXT: vand.vv v8, v8, v16 ; CHECK-F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-F-NEXT: vfncvt.f.xu.w v16, v8 -; CHECK-F-NEXT: vsrl.vi v16, v16, 23 -; CHECK-F-NEXT: li a1, 127 -; CHECK-F-NEXT: vwsubu.vx v8, v16, a1 ; CHECK-F-NEXT: fsrm a0 +; CHECK-F-NEXT: vsrl.vi v16, v16, 23 +; CHECK-F-NEXT: li a0, 127 +; CHECK-F-NEXT: vwsubu.vx v8, v16, a0 ; CHECK-F-NEXT: ret ; ; CHECK-D-LABEL: cttz_zero_undef_nxv8i64: @@ -3116,13 +3124,13 @@ define @cttz_zero_undef_nxv8i64( %va) { ; CHECK-D-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-D-NEXT: vrsub.vi v16, v8, 0 ; CHECK-D-NEXT: fsrmi a0, 1 -; CHECK-D-NEXT: li a1, 52 ; CHECK-D-NEXT: vand.vv v8, v8, v16 ; CHECK-D-NEXT: vfcvt.f.xu.v v8, v8 -; CHECK-D-NEXT: vsrl.vx v8, v8, a1 -; CHECK-D-NEXT: li a1, 1023 -; CHECK-D-NEXT: vsub.vx v8, v8, a1 ; CHECK-D-NEXT: fsrm a0 +; CHECK-D-NEXT: li a0, 52 +; CHECK-D-NEXT: vsrl.vx v8, v8, a0 +; CHECK-D-NEXT: li a0, 1023 +; CHECK-D-NEXT: vsub.vx v8, v8, a0 ; CHECK-D-NEXT: ret ; ; CHECK-ZVBB-LABEL: cttz_zero_undef_nxv8i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll index 766717d92a749..60ea1881ed213 100644 --- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll @@ -3708,12 +3708,12 @@ define @vp_cttz_zero_undef_nxv16i32( %va, ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vrsub.vi v16, v8, 0, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: li a1, 127 ; CHECK-NEXT: vand.vv v8, v8, v16, v0.t ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t -; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t -; CHECK-NEXT: vsub.vx v8, v8, a1, v0.t ; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: li a0, 127 +; CHECK-NEXT: vsrl.vi v8, v8, 23, v0.t +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t ; CHECK-NEXT: ret ; ; CHECK-ZVBB-LABEL: vp_cttz_zero_undef_nxv16i32: @@ -3733,10 +3733,10 @@ define @vp_cttz_zero_undef_nxv16i32_unmasked( @vp_cttz_zero_undef_nxv1i64( %va, @vp_cttz_zero_undef_nxv1i64_unmasked( @vp_cttz_zero_undef_nxv2i64( %va, @vp_cttz_zero_undef_nxv2i64_unmasked( @vp_cttz_zero_undef_nxv4i64( %va, @vp_cttz_zero_undef_nxv4i64_unmasked( @vp_cttz_zero_undef_nxv7i64( %va, @vp_cttz_zero_undef_nxv7i64_unmasked( @vp_cttz_zero_undef_nxv8i64( %va, @vp_cttz_zero_undef_nxv8i64_unmasked( @vp_cttz_zero_undef_nxv16i64( %va, ; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vand.vv v8, v8, v16, v0.t ; CHECK-NEXT: vfcvt.f.xu.v v8, v8, v0.t +; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vsrl.vx v8, v8, a2, v0.t ; CHECK-NEXT: vsub.vx v8, v8, a3, v0.t -; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb @@ -4104,12 +4104,12 @@ define @vp_cttz_zero_undef_nxv16i64_unmasked(This Inner Loop Header: Depth=1 ; NO-SINK-NEXT: vl1re32.v v9, (a5) ; NO-SINK-NEXT: sub a6, a6, a3 ; NO-SINK-NEXT: vfadd.vv v9, v9, v8 ; NO-SINK-NEXT: vs1r.v v9, (a5) -; NO-SINK-NEXT: add a5, a5, a1 +; NO-SINK-NEXT: add a5, a5, a2 ; NO-SINK-NEXT: bnez a6, .LBB4_3 ; NO-SINK-NEXT: # %bb.4: # %middle.block ; NO-SINK-NEXT: beqz a4, .LBB4_7 ; NO-SINK-NEXT: .LBB4_5: # %for.body.preheader -; NO-SINK-NEXT: slli a1, a2, 2 +; NO-SINK-NEXT: slli a1, a1, 2 ; NO-SINK-NEXT: lui a2, 1 ; NO-SINK-NEXT: add a1, a0, a1 ; NO-SINK-NEXT: add a0, a0, a2 @@ -448,19 +448,19 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; ; SINK-LABEL: sink_splat_fadd_scalable: ; SINK: # %bb.0: # %entry -; SINK-NEXT: csrr a1, vlenb -; SINK-NEXT: srli a3, a1, 2 -; SINK-NEXT: li a2, 1024 -; SINK-NEXT: bgeu a2, a3, .LBB4_2 +; SINK-NEXT: csrr a2, vlenb +; SINK-NEXT: srli a3, a2, 2 +; SINK-NEXT: li a1, 1024 +; SINK-NEXT: bgeu a1, a3, .LBB4_2 ; SINK-NEXT: # %bb.1: -; SINK-NEXT: li a2, 0 +; SINK-NEXT: li a1, 0 ; SINK-NEXT: j .LBB4_5 ; SINK-NEXT: .LBB4_2: # %vector.ph -; SINK-NEXT: addi a2, a3, -1 -; SINK-NEXT: andi a4, a2, 1024 -; SINK-NEXT: xori a2, a4, 1024 +; SINK-NEXT: addi a1, a3, -1 +; SINK-NEXT: andi a4, a1, 1024 +; SINK-NEXT: xori a1, a4, 1024 ; SINK-NEXT: mv a5, a0 -; SINK-NEXT: mv a6, a2 +; SINK-NEXT: mv a6, a1 ; SINK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; SINK-NEXT: .LBB4_3: # %vector.body ; SINK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -468,12 +468,12 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; SINK-NEXT: sub a6, a6, a3 ; SINK-NEXT: vfadd.vf v8, v8, fa0 ; SINK-NEXT: vs1r.v v8, (a5) -; SINK-NEXT: add a5, a5, a1 +; SINK-NEXT: add a5, a5, a2 ; SINK-NEXT: bnez a6, .LBB4_3 ; SINK-NEXT: # %bb.4: # %middle.block ; SINK-NEXT: beqz a4, .LBB4_7 ; SINK-NEXT: .LBB4_5: # %for.body.preheader -; SINK-NEXT: slli a1, a2, 2 +; SINK-NEXT: slli a1, a1, 2 ; SINK-NEXT: lui a2, 1 ; SINK-NEXT: add a1, a0, a1 ; SINK-NEXT: add a0, a0, a2 @@ -489,19 +489,19 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; ; DEFAULT-LABEL: sink_splat_fadd_scalable: ; DEFAULT: # %bb.0: # %entry -; DEFAULT-NEXT: csrr a1, vlenb -; DEFAULT-NEXT: srli a3, a1, 2 -; DEFAULT-NEXT: li a2, 1024 -; DEFAULT-NEXT: bgeu a2, a3, .LBB4_2 +; DEFAULT-NEXT: csrr a2, vlenb +; DEFAULT-NEXT: srli a3, a2, 2 +; DEFAULT-NEXT: li a1, 1024 +; DEFAULT-NEXT: bgeu a1, a3, .LBB4_2 ; DEFAULT-NEXT: # %bb.1: -; DEFAULT-NEXT: li a2, 0 +; DEFAULT-NEXT: li a1, 0 ; DEFAULT-NEXT: j .LBB4_5 ; DEFAULT-NEXT: .LBB4_2: # %vector.ph -; DEFAULT-NEXT: addi a2, a3, -1 -; DEFAULT-NEXT: andi a4, a2, 1024 -; DEFAULT-NEXT: xori a2, a4, 1024 +; DEFAULT-NEXT: addi a1, a3, -1 +; DEFAULT-NEXT: andi a4, a1, 1024 +; DEFAULT-NEXT: xori a1, a4, 1024 ; DEFAULT-NEXT: mv a5, a0 -; DEFAULT-NEXT: mv a6, a2 +; DEFAULT-NEXT: mv a6, a1 ; DEFAULT-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; DEFAULT-NEXT: .LBB4_3: # %vector.body ; DEFAULT-NEXT: # =>This Inner Loop Header: Depth=1 @@ -509,12 +509,12 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; DEFAULT-NEXT: sub a6, a6, a3 ; DEFAULT-NEXT: vfadd.vf v8, v8, fa0 ; DEFAULT-NEXT: vs1r.v v8, (a5) -; DEFAULT-NEXT: add a5, a5, a1 +; DEFAULT-NEXT: add a5, a5, a2 ; DEFAULT-NEXT: bnez a6, .LBB4_3 ; DEFAULT-NEXT: # %bb.4: # %middle.block ; DEFAULT-NEXT: beqz a4, .LBB4_7 ; DEFAULT-NEXT: .LBB4_5: # %for.body.preheader -; DEFAULT-NEXT: slli a1, a2, 2 +; DEFAULT-NEXT: slli a1, a1, 2 ; DEFAULT-NEXT: lui a2, 1 ; DEFAULT-NEXT: add a1, a0, a1 ; DEFAULT-NEXT: add a0, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll index 8c63c2d4be8c1..ec8580e0b6f12 100644 --- a/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/double-round-conv.ll @@ -497,12 +497,12 @@ declare @llvm.ceil.nxv1f64() define @ceil_nxv1f64_to_si8( %x) { ; RV32-LABEL: ceil_nxv1f64_to_si8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI16_0) -; RV32-NEXT: fld fa5, %lo(.LCPI16_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: lui a0, %hi(.LCPI16_0) +; RV32-NEXT: fld fa5, %lo(.LCPI16_0)(a0) ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -518,12 +518,12 @@ define @ceil_nxv1f64_to_si8( %x) { ; ; RV64-LABEL: ceil_nxv1f64_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI16_0) -; RV64-NEXT: fld fa5, %lo(.LCPI16_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: lui a0, %hi(.LCPI16_0) +; RV64-NEXT: fld fa5, %lo(.LCPI16_0)(a0) ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -544,12 +544,12 @@ define @ceil_nxv1f64_to_si8( %x) { define @ceil_nxv1f64_to_ui8( %x) { ; RV32-LABEL: ceil_nxv1f64_to_ui8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI17_0) -; RV32-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: lui a0, %hi(.LCPI17_0) +; RV32-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -565,12 +565,12 @@ define @ceil_nxv1f64_to_ui8( %x) { ; ; RV64-LABEL: ceil_nxv1f64_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI17_0) -; RV64-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: lui a0, %hi(.LCPI17_0) +; RV64-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -591,12 +591,12 @@ define @ceil_nxv1f64_to_ui8( %x) { define @ceil_nxv1f64_to_si16( %x) { ; RV32-LABEL: ceil_nxv1f64_to_si16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI18_0) -; RV32-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: lui a0, %hi(.LCPI18_0) +; RV32-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -610,12 +610,12 @@ define @ceil_nxv1f64_to_si16( %x) { ; ; RV64-LABEL: ceil_nxv1f64_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI18_0) -; RV64-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: lui a0, %hi(.LCPI18_0) +; RV64-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -634,12 +634,12 @@ define @ceil_nxv1f64_to_si16( %x) { define @ceil_nxv1f64_to_ui16( %x) { ; RV32-LABEL: ceil_nxv1f64_to_ui16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI19_0) -; RV32-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV32-NEXT: vfabs.v v9, v8 -; RV32-NEXT: vmflt.vf v0, v9, fa5 +; RV32-NEXT: lui a0, %hi(.LCPI19_0) +; RV32-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -653,12 +653,12 @@ define @ceil_nxv1f64_to_ui16( %x) { ; ; RV64-LABEL: ceil_nxv1f64_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI19_0) -; RV64-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; RV64-NEXT: vfabs.v v9, v8 -; RV64-NEXT: vmflt.vf v0, v9, fa5 +; RV64-NEXT: lui a0, %hi(.LCPI19_0) +; RV64-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -771,12 +771,12 @@ declare @llvm.ceil.nxv4f64() define @ceil_nxv4f64_to_si8( %x) { ; RV32-LABEL: ceil_nxv4f64_to_si8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI24_0) -; RV32-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: lui a0, %hi(.LCPI24_0) +; RV32-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -792,12 +792,12 @@ define @ceil_nxv4f64_to_si8( %x) { ; ; RV64-LABEL: ceil_nxv4f64_to_si8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI24_0) -; RV64-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: lui a0, %hi(.LCPI24_0) +; RV64-NEXT: fld fa5, %lo(.LCPI24_0)(a0) ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -818,12 +818,12 @@ define @ceil_nxv4f64_to_si8( %x) { define @ceil_nxv4f64_to_ui8( %x) { ; RV32-LABEL: ceil_nxv4f64_to_ui8: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI25_0) -; RV32-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: lui a0, %hi(.LCPI25_0) +; RV32-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -839,12 +839,12 @@ define @ceil_nxv4f64_to_ui8( %x) { ; ; RV64-LABEL: ceil_nxv4f64_to_ui8: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI25_0) -; RV64-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: lui a0, %hi(.LCPI25_0) +; RV64-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -865,12 +865,12 @@ define @ceil_nxv4f64_to_ui8( %x) { define @ceil_nxv4f64_to_si16( %x) { ; RV32-LABEL: ceil_nxv4f64_to_si16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI26_0) -; RV32-NEXT: fld fa5, %lo(.LCPI26_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: lui a0, %hi(.LCPI26_0) +; RV32-NEXT: fld fa5, %lo(.LCPI26_0)(a0) ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -884,12 +884,12 @@ define @ceil_nxv4f64_to_si16( %x) { ; ; RV64-LABEL: ceil_nxv4f64_to_si16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI26_0) -; RV64-NEXT: fld fa5, %lo(.LCPI26_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: lui a0, %hi(.LCPI26_0) +; RV64-NEXT: fld fa5, %lo(.LCPI26_0)(a0) ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -908,12 +908,12 @@ define @ceil_nxv4f64_to_si16( %x) { define @ceil_nxv4f64_to_ui16( %x) { ; RV32-LABEL: ceil_nxv4f64_to_ui16: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI27_0) -; RV32-NEXT: fld fa5, %lo(.LCPI27_0)(a0) ; RV32-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV32-NEXT: vfabs.v v12, v8 -; RV32-NEXT: vmflt.vf v0, v12, fa5 +; RV32-NEXT: lui a0, %hi(.LCPI27_0) +; RV32-NEXT: fld fa5, %lo(.LCPI27_0)(a0) ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v12, fa5 ; RV32-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -927,12 +927,12 @@ define @ceil_nxv4f64_to_ui16( %x) { ; ; RV64-LABEL: ceil_nxv4f64_to_ui16: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI27_0) -; RV64-NEXT: fld fa5, %lo(.LCPI27_0)(a0) ; RV64-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; RV64-NEXT: vfabs.v v12, v8 -; RV64-NEXT: vmflt.vf v0, v12, fa5 +; RV64-NEXT: lui a0, %hi(.LCPI27_0) +; RV64-NEXT: fld fa5, %lo(.LCPI27_0)(a0) ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v12, fa5 ; RV64-NEXT: vfcvt.x.f.v v12, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/expandload.ll b/llvm/test/CodeGen/RISCV/rvv/expandload.ll index a35cf14203f78..51c70a32ccac8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/expandload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/expandload.ll @@ -136,12 +136,12 @@ define <32 x i8> @test_expandload_v32i8(ptr %base, <32 x i1> %mask, <32 x i8> %p ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: viota.m v10, v0 ; CHECK-NEXT: vcpop.m a2, v0 ; CHECK-NEXT: vsetvli zero, a2, e8, m2, ta, ma -; CHECK-NEXT: vle8.v v10, (a0) +; CHECK-NEXT: vle8.v v12, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: viota.m v12, v0 -; CHECK-NEXT: vrgather.vv v8, v10, v12, v0.t +; CHECK-NEXT: vrgather.vv v8, v12, v10, v0.t ; CHECK-NEXT: ret %res = call <32 x i8> @llvm.masked.expandload.v32i8(ptr align 1 %base, <32 x i1> %mask, <32 x i8> %passthru) ret <32 x i8> %res @@ -163,12 +163,12 @@ define <64 x i8> @test_expandload_v64i8(ptr %base, <64 x i1> %mask, <64 x i8> %p ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: viota.m v12, v0 ; CHECK-NEXT: vcpop.m a2, v0 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; CHECK-NEXT: vle8.v v12, (a0) +; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu -; CHECK-NEXT: viota.m v16, v0 -; CHECK-NEXT: vrgather.vv v8, v12, v16, v0.t +; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t ; CHECK-NEXT: ret %res = call <64 x i8> @llvm.masked.expandload.v64i8(ptr align 1 %base, <64 x i1> %mask, <64 x i8> %passthru) ret <64 x i8> %res @@ -190,12 +190,12 @@ define <128 x i8> @test_expandload_v128i8(ptr %base, <128 x i1> %mask, <128 x i8 ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: viota.m v16, v0 ; CHECK-NEXT: vcpop.m a2, v0 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v16, (a0) +; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, mu -; CHECK-NEXT: viota.m v24, v0 -; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: vrgather.vv v8, v24, v16, v0.t ; CHECK-NEXT: ret %res = call <128 x i8> @llvm.masked.expandload.v128i8(ptr align 1 %base, <128 x i1> %mask, <128 x i8> %passthru) ret <128 x i8> %res @@ -218,106 +218,71 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8 ; CHECK-RV32-NEXT: addi sp, sp, -16 ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: csrr a2, vlenb -; CHECK-RV32-NEXT: slli a2, a2, 5 +; CHECK-RV32-NEXT: slli a2, a2, 4 ; CHECK-RV32-NEXT: sub sp, sp, a2 -; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-RV32-NEXT: csrr a2, vlenb -; CHECK-RV32-NEXT: li a3, 24 -; CHECK-RV32-NEXT: mul a2, a2, a3 +; CHECK-RV32-NEXT: slli a2, a2, 3 ; CHECK-RV32-NEXT: add a2, sp, a2 ; CHECK-RV32-NEXT: addi a2, a2, 16 ; CHECK-RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vmv1r.v v7, v8 ; CHECK-RV32-NEXT: li a2, 128 -; CHECK-RV32-NEXT: vslidedown.vi v9, v0, 1 +; CHECK-RV32-NEXT: vslidedown.vi v6, v0, 1 ; CHECK-RV32-NEXT: li a3, 32 ; CHECK-RV32-NEXT: vmv.x.s a4, v0 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-RV32-NEXT: viota.m v16, v0 +; CHECK-RV32-NEXT: addi a5, sp, 16 +; CHECK-RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vcpop.m a5, v0 +; CHECK-RV32-NEXT: vsetvli zero, a5, e8, m8, ta, ma +; CHECK-RV32-NEXT: vle8.v v24, (a0) +; CHECK-RV32-NEXT: csrr a5, vlenb +; CHECK-RV32-NEXT: slli a5, a5, 3 +; CHECK-RV32-NEXT: add a5, sp, a5 +; CHECK-RV32-NEXT: addi a5, a5, 16 +; CHECK-RV32-NEXT: vl8r.v v16, (a5) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: addi a5, sp, 16 +; CHECK-RV32-NEXT: vl8r.v v8, (a5) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t +; CHECK-RV32-NEXT: csrr a5, vlenb +; CHECK-RV32-NEXT: slli a5, a5, 3 +; CHECK-RV32-NEXT: add a5, sp, a5 +; CHECK-RV32-NEXT: addi a5, a5, 16 +; CHECK-RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vle8.v v16, (a1) -; CHECK-RV32-NEXT: csrr a1, vlenb -; CHECK-RV32-NEXT: slli a1, a1, 3 -; CHECK-RV32-NEXT: add a1, sp, a1 -; CHECK-RV32-NEXT: addi a1, a1, 16 -; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsrl.vx v10, v9, a3 +; CHECK-RV32-NEXT: vsrl.vx v10, v6, a3 ; CHECK-RV32-NEXT: vsrl.vx v11, v0, a3 -; CHECK-RV32-NEXT: vmv.x.s a1, v9 -; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-RV32-NEXT: vcpop.m a3, v0 -; CHECK-RV32-NEXT: cpop a4, a4 -; CHECK-RV32-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.x.s a5, v10 -; CHECK-RV32-NEXT: vmv.x.s a6, v11 -; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-RV32-NEXT: vle8.v v8, (a0) -; CHECK-RV32-NEXT: csrr a3, vlenb -; CHECK-RV32-NEXT: slli a3, a3, 4 -; CHECK-RV32-NEXT: add a3, sp, a3 -; CHECK-RV32-NEXT: addi a3, a3, 16 -; CHECK-RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vmv.x.s a1, v6 +; CHECK-RV32-NEXT: cpop a3, a4 +; CHECK-RV32-NEXT: vmv.x.s a4, v10 +; CHECK-RV32-NEXT: vmv.x.s a5, v11 ; CHECK-RV32-NEXT: cpop a1, a1 -; CHECK-RV32-NEXT: cpop a3, a6 ; CHECK-RV32-NEXT: cpop a5, a5 -; CHECK-RV32-NEXT: add a3, a4, a3 -; CHECK-RV32-NEXT: add a1, a1, a5 +; CHECK-RV32-NEXT: cpop a4, a4 +; CHECK-RV32-NEXT: add a3, a3, a5 +; CHECK-RV32-NEXT: add a1, a1, a4 ; CHECK-RV32-NEXT: add a1, a3, a1 ; CHECK-RV32-NEXT: add a0, a0, a1 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-RV32-NEXT: vcpop.m a1, v7 ; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m8, ta, ma -; CHECK-RV32-NEXT: vle8.v v8, (a0) -; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vle8.v v24, (a0) ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m8, ta, mu -; CHECK-RV32-NEXT: viota.m v24, v0 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: li a1, 24 -; CHECK-RV32-NEXT: mul a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 4 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: li a1, 24 -; CHECK-RV32-NEXT: mul a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: viota.m v16, v7 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 4 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: viota.m v8, v7 ; CHECK-RV32-NEXT: vmv1r.v v0, v7 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 3 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 4 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: li a1, 24 -; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 5 +; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add sp, sp, a0 ; CHECK-RV32-NEXT: .cfi_def_cfa sp, 16 ; CHECK-RV32-NEXT: addi sp, sp, 16 @@ -329,38 +294,50 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8 ; CHECK-RV64-NEXT: addi sp, sp, -16 ; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV64-NEXT: csrr a2, vlenb -; CHECK-RV64-NEXT: slli a2, a2, 5 -; CHECK-RV64-NEXT: sub sp, sp, a2 -; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-RV64-NEXT: csrr a2, vlenb ; CHECK-RV64-NEXT: li a3, 24 ; CHECK-RV64-NEXT: mul a2, a2, a3 +; CHECK-RV64-NEXT: sub sp, sp, a2 +; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-RV64-NEXT: csrr a2, vlenb +; CHECK-RV64-NEXT: slli a2, a2, 4 ; CHECK-RV64-NEXT: add a2, sp, a2 ; CHECK-RV64-NEXT: addi a2, a2, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vmv1r.v v7, v8 ; CHECK-RV64-NEXT: li a2, 128 -; CHECK-RV64-NEXT: vslidedown.vi v9, v0, 1 +; CHECK-RV64-NEXT: vslidedown.vi v6, v0, 1 ; CHECK-RV64-NEXT: vmv.x.s a3, v0 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-RV64-NEXT: vle8.v v16, (a1) -; CHECK-RV64-NEXT: csrr a1, vlenb -; CHECK-RV64-NEXT: slli a1, a1, 3 -; CHECK-RV64-NEXT: add a1, sp, a1 -; CHECK-RV64-NEXT: addi a1, a1, 16 -; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m1, ta, ma -; CHECK-RV64-NEXT: vmv.x.s a1, v9 -; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-RV64-NEXT: viota.m v16, v0 +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: slli a4, a4, 3 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vcpop.m a4, v0 ; CHECK-RV64-NEXT: vsetvli zero, a4, e8, m8, ta, ma -; CHECK-RV64-NEXT: vle8.v v8, (a0) +; CHECK-RV64-NEXT: vle8.v v16, (a0) +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: slli a4, a4, 3 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a4, vlenb +; CHECK-RV64-NEXT: slli a4, a4, 4 +; CHECK-RV64-NEXT: add a4, sp, a4 +; CHECK-RV64-NEXT: addi a4, a4, 16 +; CHECK-RV64-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, mu +; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-RV64-NEXT: csrr a4, vlenb ; CHECK-RV64-NEXT: slli a4, a4, 4 ; CHECK-RV64-NEXT: add a4, sp, a4 ; CHECK-RV64-NEXT: addi a4, a4, 16 ; CHECK-RV64-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vle8.v v16, (a1) +; CHECK-RV64-NEXT: vsetvli zero, a2, e64, m1, ta, ma +; CHECK-RV64-NEXT: vmv.x.s a1, v6 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-RV64-NEXT: vcpop.m a4, v7 ; CHECK-RV64-NEXT: cpop a3, a3 @@ -372,53 +349,29 @@ define <256 x i8> @test_expandload_v256i8(ptr %base, <256 x i1> %mask, <256 x i8 ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m8, ta, mu -; CHECK-RV64-NEXT: viota.m v24, v0 -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: viota.m v24, v7 ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 4 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: viota.m v16, v7 -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 4 +; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vmv1r.v v0, v7 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: addi a0, sp, 16 +; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vrgather.vv v16, v8, v24, v0.t ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: li a1, 24 ; CHECK-RV64-NEXT: mul a0, a0, a1 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 5 ; CHECK-RV64-NEXT: add sp, sp, a0 ; CHECK-RV64-NEXT: .cfi_def_cfa sp, 16 ; CHECK-RV64-NEXT: addi sp, sp, 16 @@ -608,13 +561,13 @@ define <32 x i16> @test_expandload_v32i16(ptr %base, <32 x i1> %mask, <32 x i16> ; CHECK-LABEL: test_expandload_v32i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: viota.m v12, v0 ; CHECK-NEXT: vcpop.m a2, v0 ; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vle16.v v12, (a0) +; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, mu -; CHECK-NEXT: viota.m v16, v0 -; CHECK-NEXT: vrgather.vv v8, v12, v16, v0.t +; CHECK-NEXT: vrgather.vv v8, v16, v12, v0.t ; CHECK-NEXT: ret %res = call <32 x i16> @llvm.masked.expandload.v32i16(ptr align 2 %base, <32 x i1> %mask, <32 x i16> %passthru) ret <32 x i16> %res @@ -635,13 +588,13 @@ define <64 x i16> @test_expandload_v64i16(ptr %base, <64 x i1> %mask, <64 x i16> ; CHECK-LABEL: test_expandload_v64i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: viota.m v16, v0 ; CHECK-NEXT: vcpop.m a2, v0 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vle16.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-NEXT: viota.m v24, v0 -; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: vrgather.vv v8, v24, v16, v0.t ; CHECK-NEXT: ret %res = call <64 x i16> @llvm.masked.expandload.v64i16(ptr align 2 %base, <64 x i1> %mask, <64 x i16> %passthru) ret <64 x i16> %res @@ -664,76 +617,66 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x ; CHECK-RV32-NEXT: addi sp, sp, -16 ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: csrr a1, vlenb -; CHECK-RV32-NEXT: slli a1, a1, 5 -; CHECK-RV32-NEXT: sub sp, sp, a1 -; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-RV32-NEXT: csrr a1, vlenb ; CHECK-RV32-NEXT: li a2, 24 ; CHECK-RV32-NEXT: mul a1, a1, a2 +; CHECK-RV32-NEXT: sub sp, sp, a1 +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; CHECK-RV32-NEXT: csrr a1, vlenb +; CHECK-RV32-NEXT: slli a1, a1, 4 ; CHECK-RV32-NEXT: add a1, sp, a1 ; CHECK-RV32-NEXT: addi a1, a1, 16 ; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: li a1, 64 +; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-RV32-NEXT: viota.m v16, v0 +; CHECK-RV32-NEXT: vcpop.m a2, v0 +; CHECK-RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; CHECK-RV32-NEXT: vle16.v v24, (a0) +; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-RV32-NEXT: vrgather.vv v8, v24, v16, v0.t +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: slli a2, a2, 3 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-RV32-NEXT: vslidedown.vi v7, v0, 8 +; CHECK-RV32-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-RV32-NEXT: li a2, 32 ; CHECK-RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-RV32-NEXT: vmv.x.s a3, v0 ; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-RV32-NEXT: vcpop.m a4, v0 +; CHECK-RV32-NEXT: vcpop.m a4, v24 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vsrl.vx v25, v0, a2 -; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-RV32-NEXT: vcpop.m a2, v7 -; CHECK-RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma -; CHECK-RV32-NEXT: vle16.v v16, (a0) -; CHECK-RV32-NEXT: csrr a5, vlenb -; CHECK-RV32-NEXT: slli a5, a5, 4 -; CHECK-RV32-NEXT: add a5, sp, a5 -; CHECK-RV32-NEXT: addi a5, a5, 16 -; CHECK-RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m1, ta, ma -; CHECK-RV32-NEXT: vmv.x.s a4, v25 -; CHECK-RV32-NEXT: cpop a4, a4 +; CHECK-RV32-NEXT: vsrl.vx v8, v0, a2 +; CHECK-RV32-NEXT: cpop a2, a3 +; CHECK-RV32-NEXT: vmv.x.s a3, v8 ; CHECK-RV32-NEXT: cpop a3, a3 -; CHECK-RV32-NEXT: add a3, a3, a4 -; CHECK-RV32-NEXT: slli a3, a3, 1 -; CHECK-RV32-NEXT: add a0, a0, a3 -; CHECK-RV32-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-RV32-NEXT: vle16.v v16, (a0) -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 3 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-RV32-NEXT: viota.m v16, v0 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 4 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v8, v24, v16, v0.t +; CHECK-RV32-NEXT: add a2, a2, a3 +; CHECK-RV32-NEXT: slli a2, a2, 1 +; CHECK-RV32-NEXT: add a0, a0, a2 +; CHECK-RV32-NEXT: vsetvli zero, a4, e16, m8, ta, ma +; CHECK-RV32-NEXT: vle16.v v8, (a0) ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: viota.m v8, v7 -; CHECK-RV32-NEXT: vmv1r.v v0, v7 +; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-RV32-NEXT: viota.m v8, v24 +; CHECK-RV32-NEXT: vmv1r.v v0, v24 ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: li a1, 24 -; CHECK-RV32-NEXT: mul a0, a0, a1 +; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 ; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: addi a0, sp, 16 +; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t -; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 5 +; CHECK-RV32-NEXT: li a1, 24 +; CHECK-RV32-NEXT: mul a0, a0, a1 ; CHECK-RV32-NEXT: add sp, sp, a0 ; CHECK-RV32-NEXT: .cfi_def_cfa sp, 16 ; CHECK-RV32-NEXT: addi sp, sp, 16 @@ -749,50 +692,58 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x ; CHECK-RV64-NEXT: sub sp, sp, a1 ; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; CHECK-RV64-NEXT: csrr a1, vlenb -; CHECK-RV64-NEXT: slli a1, a1, 3 +; CHECK-RV64-NEXT: slli a1, a1, 4 ; CHECK-RV64-NEXT: add a1, sp, a1 ; CHECK-RV64-NEXT: addi a1, a1, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: li a1, 64 -; CHECK-RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-RV64-NEXT: vslidedown.vi v7, v0, 8 -; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-RV64-NEXT: viota.m v16, v0 +; CHECK-RV64-NEXT: csrr a2, vlenb +; CHECK-RV64-NEXT: li a3, 24 +; CHECK-RV64-NEXT: mul a2, a2, a3 +; CHECK-RV64-NEXT: add a2, sp, a2 +; CHECK-RV64-NEXT: addi a2, a2, 16 +; CHECK-RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vcpop.m a2, v0 -; CHECK-RV64-NEXT: vcpop.m a3, v7 ; CHECK-RV64-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; CHECK-RV64-NEXT: vle16.v v24, (a0) -; CHECK-RV64-NEXT: csrr a4, vlenb -; CHECK-RV64-NEXT: slli a4, a4, 4 -; CHECK-RV64-NEXT: add a4, sp, a4 -; CHECK-RV64-NEXT: addi a4, a4, 16 -; CHECK-RV64-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: csrr a3, vlenb +; CHECK-RV64-NEXT: li a4, 24 +; CHECK-RV64-NEXT: mul a3, a3, a4 +; CHECK-RV64-NEXT: add a3, sp, a3 +; CHECK-RV64-NEXT: addi a3, a3, 16 +; CHECK-RV64-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu +; CHECK-RV64-NEXT: vrgather.vv v8, v24, v16, v0.t +; CHECK-RV64-NEXT: addi a3, sp, 16 +; CHECK-RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-RV64-NEXT: vslidedown.vi v0, v0, 8 +; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-RV64-NEXT: vcpop.m a3, v0 ; CHECK-RV64-NEXT: slli a2, a2, 1 ; CHECK-RV64-NEXT: add a0, a0, a2 ; CHECK-RV64-NEXT: vsetvli zero, a3, e16, m8, ta, ma -; CHECK-RV64-NEXT: vle16.v v24, (a0) +; CHECK-RV64-NEXT: vle16.v v16, (a0) ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: li a2, 24 ; CHECK-RV64-NEXT: mul a0, a0, a2 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m8, ta, mu -; CHECK-RV64-NEXT: viota.m v24, v0 +; CHECK-RV64-NEXT: viota.m v16, v0 ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 4 +; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t -; CHECK-RV64-NEXT: addi a0, sp, 16 -; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: viota.m v16, v7 +; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: vmv1r.v v0, v7 +; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: li a1, 24 ; CHECK-RV64-NEXT: mul a0, a0, a1 @@ -803,11 +754,6 @@ define <128 x i16> @test_expandload_v128i16(ptr %base, <128 x i1> %mask, <128 x ; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 4 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 @@ -990,13 +936,13 @@ define <32 x i32> @test_expandload_v32i32(ptr %base, <32 x i1> %mask, <32 x i32> ; CHECK-LABEL: test_expandload_v32i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: viota.m v16, v0 ; CHECK-NEXT: vcpop.m a2, v0 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v16, (a0) +; CHECK-NEXT: vle32.v v24, (a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: viota.m v24, v0 -; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t +; CHECK-NEXT: vrgather.vv v8, v24, v16, v0.t ; CHECK-NEXT: ret %res = call <32 x i32> @llvm.masked.expandload.v32i32(ptr align 4 %base, <32 x i1> %mask, <32 x i32> %passthru) ret <32 x i32> %res @@ -1023,50 +969,58 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32> ; CHECK-RV32-NEXT: sub sp, sp, a1 ; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb ; CHECK-RV32-NEXT: csrr a1, vlenb -; CHECK-RV32-NEXT: slli a1, a1, 3 +; CHECK-RV32-NEXT: slli a1, a1, 4 ; CHECK-RV32-NEXT: add a1, sp, a1 ; CHECK-RV32-NEXT: addi a1, a1, 16 ; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: li a1, 32 -; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-RV32-NEXT: vslidedown.vi v7, v0, 4 -; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-RV32-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-RV32-NEXT: viota.m v16, v0 +; CHECK-RV32-NEXT: csrr a2, vlenb +; CHECK-RV32-NEXT: li a3, 24 +; CHECK-RV32-NEXT: mul a2, a2, a3 +; CHECK-RV32-NEXT: add a2, sp, a2 +; CHECK-RV32-NEXT: addi a2, a2, 16 +; CHECK-RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vcpop.m a2, v0 -; CHECK-RV32-NEXT: vcpop.m a3, v7 ; CHECK-RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-RV32-NEXT: vle32.v v24, (a0) -; CHECK-RV32-NEXT: csrr a4, vlenb -; CHECK-RV32-NEXT: slli a4, a4, 4 -; CHECK-RV32-NEXT: add a4, sp, a4 -; CHECK-RV32-NEXT: addi a4, a4, 16 -; CHECK-RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: csrr a3, vlenb +; CHECK-RV32-NEXT: li a4, 24 +; CHECK-RV32-NEXT: mul a3, a3, a4 +; CHECK-RV32-NEXT: add a3, sp, a3 +; CHECK-RV32-NEXT: addi a3, a3, 16 +; CHECK-RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; CHECK-RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-RV32-NEXT: vrgather.vv v8, v24, v16, v0.t +; CHECK-RV32-NEXT: addi a3, sp, 16 +; CHECK-RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-RV32-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-RV32-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-RV32-NEXT: vcpop.m a3, v0 ; CHECK-RV32-NEXT: slli a2, a2, 2 ; CHECK-RV32-NEXT: add a0, a0, a2 ; CHECK-RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-RV32-NEXT: vle32.v v24, (a0) +; CHECK-RV32-NEXT: vle32.v v16, (a0) ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: li a2, 24 ; CHECK-RV32-NEXT: mul a0, a0, a2 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-RV32-NEXT: viota.m v24, v0 +; CHECK-RV32-NEXT: viota.m v16, v0 ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 4 +; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t -; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: viota.m v16, v7 +; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: vmv1r.v v0, v7 +; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: li a1, 24 ; CHECK-RV32-NEXT: mul a0, a0, a1 @@ -1077,11 +1031,6 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32> ; CHECK-RV32-NEXT: slli a0, a0, 3 ; CHECK-RV32-NEXT: add a0, sp, a0 ; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 4 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV32-NEXT: addi a0, sp, 16 @@ -1108,55 +1057,68 @@ define <64 x i32> @test_expandload_v64i32(ptr %base, <64 x i1> %mask, <64 x i32> ; CHECK-RV64-NEXT: addi a1, a1, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: li a1, 32 +; CHECK-RV64-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-RV64-NEXT: viota.m v16, v0 +; CHECK-RV64-NEXT: csrr a2, vlenb +; CHECK-RV64-NEXT: li a3, 24 +; CHECK-RV64-NEXT: mul a2, a2, a3 +; CHECK-RV64-NEXT: add a2, sp, a2 +; CHECK-RV64-NEXT: addi a2, a2, 16 +; CHECK-RV64-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vcpop.m a2, v0 +; CHECK-RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; CHECK-RV64-NEXT: vle32.v v24, (a0) +; CHECK-RV64-NEXT: csrr a2, vlenb +; CHECK-RV64-NEXT: li a3, 24 +; CHECK-RV64-NEXT: mul a2, a2, a3 +; CHECK-RV64-NEXT: add a2, sp, a2 +; CHECK-RV64-NEXT: addi a2, a2, 16 +; CHECK-RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu +; CHECK-RV64-NEXT: vrgather.vv v8, v24, v16, v0.t +; CHECK-RV64-NEXT: addi a2, sp, 16 +; CHECK-RV64-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-RV64-NEXT: vslidedown.vi v7, v0, 4 +; CHECK-RV64-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-RV64-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-RV64-NEXT: vmv.x.s a2, v0 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-RV64-NEXT: vcpop.m a3, v0 -; CHECK-RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-RV64-NEXT: vle32.v v24, (a0) -; CHECK-RV64-NEXT: csrr a3, vlenb -; CHECK-RV64-NEXT: li a4, 24 -; CHECK-RV64-NEXT: mul a3, a3, a4 -; CHECK-RV64-NEXT: add a3, sp, a3 -; CHECK-RV64-NEXT: addi a3, a3, 16 -; CHECK-RV64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-RV64-NEXT: vcpop.m a3, v7 +; CHECK-RV64-NEXT: vcpop.m a3, v24 ; CHECK-RV64-NEXT: cpopw a2, a2 ; CHECK-RV64-NEXT: slli a2, a2, 2 ; CHECK-RV64-NEXT: add a0, a0, a2 ; CHECK-RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-RV64-NEXT: vle32.v v16, (a0) ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 3 +; CHECK-RV64-NEXT: li a2, 24 +; CHECK-RV64-NEXT: mul a0, a0, a2 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-RV64-NEXT: viota.m v24, v0 +; CHECK-RV64-NEXT: viota.m v16, v24 ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 +; CHECK-RV64-NEXT: slli a0, a0, 3 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t -; CHECK-RV64-NEXT: addi a0, sp, 16 -; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: viota.m v8, v7 -; CHECK-RV64-NEXT: vmv1r.v v0, v7 +; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vmv1r.v v0, v24 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 3 +; CHECK-RV64-NEXT: li a1, 24 +; CHECK-RV64-NEXT: mul a0, a0, a1 ; CHECK-RV64-NEXT: add a0, sp, a0 ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-RV64-NEXT: csrr a0, vlenb +; CHECK-RV64-NEXT: slli a0, a0, 3 +; CHECK-RV64-NEXT: add a0, sp, a0 +; CHECK-RV64-NEXT: addi a0, a0, 16 +; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: vrgather.vv v16, v24, v8, v0.t ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -1329,33 +1291,34 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV32-NEXT: addi sp, sp, -16 ; CHECK-RV32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV32-NEXT: csrr a1, vlenb -; CHECK-RV32-NEXT: slli a1, a1, 5 +; CHECK-RV32-NEXT: li a2, 24 +; CHECK-RV32-NEXT: mul a1, a1, a2 ; CHECK-RV32-NEXT: sub sp, sp, a1 -; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-RV32-NEXT: csrr a1, vlenb ; CHECK-RV32-NEXT: slli a1, a1, 4 ; CHECK-RV32-NEXT: add a1, sp, a1 ; CHECK-RV32-NEXT: addi a1, a1, 16 ; CHECK-RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV32-NEXT: vcpop.m a1, v0 -; CHECK-RV32-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-RV32-NEXT: viota.m v16, v0 +; CHECK-RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-RV32-NEXT: vle64.v v24, (a0) -; CHECK-RV32-NEXT: csrr a1, vlenb -; CHECK-RV32-NEXT: li a2, 24 -; CHECK-RV32-NEXT: mul a1, a1, a2 -; CHECK-RV32-NEXT: add a1, sp, a1 -; CHECK-RV32-NEXT: addi a1, a1, 16 -; CHECK-RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-RV32-NEXT: vrgather.vv v8, v24, v16, v0.t +; CHECK-RV32-NEXT: addi a1, sp, 16 +; CHECK-RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-RV32-NEXT: vmv.x.s a1, v0 ; CHECK-RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-RV32-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-RV32-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-RV32-NEXT: zext.h a1, a1 ; CHECK-RV32-NEXT: cpop a1, a1 ; CHECK-RV32-NEXT: slli a1, a1, 3 ; CHECK-RV32-NEXT: add a0, a0, a1 ; CHECK-RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-RV32-NEXT: vcpop.m a1, v7 +; CHECK-RV32-NEXT: vcpop.m a1, v0 ; CHECK-RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-RV32-NEXT: vle64.v v16, (a0) ; CHECK-RV32-NEXT: csrr a0, vlenb @@ -1364,18 +1327,7 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV32-NEXT: addi a0, a0, 16 ; CHECK-RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; CHECK-RV32-NEXT: viota.m v24, v0 -; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: li a1, 24 -; CHECK-RV32-NEXT: mul a0, a0, a1 -; CHECK-RV32-NEXT: add a0, sp, a0 -; CHECK-RV32-NEXT: addi a0, a0, 16 -; CHECK-RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV32-NEXT: vrgather.vv v8, v16, v24, v0.t -; CHECK-RV32-NEXT: addi a0, sp, 16 -; CHECK-RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV32-NEXT: viota.m v8, v7 -; CHECK-RV32-NEXT: vmv1r.v v0, v7 +; CHECK-RV32-NEXT: viota.m v8, v0 ; CHECK-RV32-NEXT: csrr a0, vlenb ; CHECK-RV32-NEXT: slli a0, a0, 4 ; CHECK-RV32-NEXT: add a0, sp, a0 @@ -1390,7 +1342,8 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV32-NEXT: addi a0, sp, 16 ; CHECK-RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV32-NEXT: csrr a0, vlenb -; CHECK-RV32-NEXT: slli a0, a0, 5 +; CHECK-RV32-NEXT: li a1, 24 +; CHECK-RV32-NEXT: mul a0, a0, a1 ; CHECK-RV32-NEXT: add sp, sp, a0 ; CHECK-RV32-NEXT: .cfi_def_cfa sp, 16 ; CHECK-RV32-NEXT: addi sp, sp, 16 @@ -1402,33 +1355,34 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV64-NEXT: addi sp, sp, -16 ; CHECK-RV64-NEXT: .cfi_def_cfa_offset 16 ; CHECK-RV64-NEXT: csrr a1, vlenb -; CHECK-RV64-NEXT: slli a1, a1, 5 +; CHECK-RV64-NEXT: li a2, 24 +; CHECK-RV64-NEXT: mul a1, a1, a2 ; CHECK-RV64-NEXT: sub sp, sp, a1 -; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb +; CHECK-RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb ; CHECK-RV64-NEXT: csrr a1, vlenb ; CHECK-RV64-NEXT: slli a1, a1, 4 ; CHECK-RV64-NEXT: add a1, sp, a1 ; CHECK-RV64-NEXT: addi a1, a1, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV64-NEXT: vcpop.m a1, v0 -; CHECK-RV64-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-RV64-NEXT: viota.m v16, v0 +; CHECK-RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-RV64-NEXT: vle64.v v24, (a0) -; CHECK-RV64-NEXT: csrr a1, vlenb -; CHECK-RV64-NEXT: li a2, 24 -; CHECK-RV64-NEXT: mul a1, a1, a2 -; CHECK-RV64-NEXT: add a1, sp, a1 -; CHECK-RV64-NEXT: addi a1, a1, 16 -; CHECK-RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu +; CHECK-RV64-NEXT: vrgather.vv v8, v24, v16, v0.t +; CHECK-RV64-NEXT: addi a1, sp, 16 +; CHECK-RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-RV64-NEXT: vmv.x.s a1, v0 ; CHECK-RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-RV64-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-RV64-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-RV64-NEXT: zext.h a1, a1 ; CHECK-RV64-NEXT: cpopw a1, a1 ; CHECK-RV64-NEXT: slli a1, a1, 3 ; CHECK-RV64-NEXT: add a0, a0, a1 ; CHECK-RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-RV64-NEXT: vcpop.m a1, v7 +; CHECK-RV64-NEXT: vcpop.m a1, v0 ; CHECK-RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-RV64-NEXT: vle64.v v16, (a0) ; CHECK-RV64-NEXT: csrr a0, vlenb @@ -1437,18 +1391,7 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV64-NEXT: addi a0, a0, 16 ; CHECK-RV64-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, mu -; CHECK-RV64-NEXT: viota.m v24, v0 -; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: li a1, 24 -; CHECK-RV64-NEXT: mul a0, a0, a1 -; CHECK-RV64-NEXT: add a0, sp, a0 -; CHECK-RV64-NEXT: addi a0, a0, 16 -; CHECK-RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-RV64-NEXT: vrgather.vv v8, v16, v24, v0.t -; CHECK-RV64-NEXT: addi a0, sp, 16 -; CHECK-RV64-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-RV64-NEXT: viota.m v8, v7 -; CHECK-RV64-NEXT: vmv1r.v v0, v7 +; CHECK-RV64-NEXT: viota.m v8, v0 ; CHECK-RV64-NEXT: csrr a0, vlenb ; CHECK-RV64-NEXT: slli a0, a0, 4 ; CHECK-RV64-NEXT: add a0, sp, a0 @@ -1463,7 +1406,8 @@ define <32 x i64> @test_expandload_v32i64(ptr %base, <32 x i1> %mask, <32 x i64> ; CHECK-RV64-NEXT: addi a0, sp, 16 ; CHECK-RV64-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-RV64-NEXT: csrr a0, vlenb -; CHECK-RV64-NEXT: slli a0, a0, 5 +; CHECK-RV64-NEXT: li a1, 24 +; CHECK-RV64-NEXT: mul a0, a0, a1 ; CHECK-RV64-NEXT: add sp, sp, a0 ; CHECK-RV64-NEXT: .cfi_def_cfa sp, 16 ; CHECK-RV64-NEXT: addi sp, sp, 16 @@ -1491,13 +1435,12 @@ define <512 x i8> @test_expandload_v512i8(ptr %base, <512 x i1> %mask, <512 x i8 ; CHECK-LABEL: test_expandload_v512i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 512 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: viota.m v16, v0 ; CHECK-NEXT: vcpop.m a2, v0 ; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma ; CHECK-NEXT: vle8.v v12, (a0) -; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma -; CHECK-NEXT: viota.m v16, v0 -; CHECK-NEXT: vsetvli zero, zero, e8, m4, ta, mu +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, mu ; CHECK-NEXT: vrgatherei16.vv v8, v12, v16, v0.t ; CHECK-NEXT: ret %res = call <512 x i8> @llvm.masked.expandload.v512i8(ptr align 1 %base, <512 x i1> %mask, <512 x i8> %passthru) @@ -1630,12 +1573,12 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 28 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: .LBB61_30: # %else110 -; CHECK-RV32-NEXT: slli a2, a3, 2 ; CHECK-RV32-NEXT: li a1, 32 +; CHECK-RV32-NEXT: slli a2, a3, 2 ; CHECK-RV32-NEXT: bgez a2, .LBB61_32 ; CHECK-RV32-NEXT: # %bb.31: # %cond.load113 ; CHECK-RV32-NEXT: lbu a2, 0(a0) @@ -1643,13 +1586,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a2 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 29 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: .LBB61_32: # %else114 -; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vsrl.vx v16, v0, a1 +; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: bgez a2, .LBB61_34 ; CHECK-RV32-NEXT: # %bb.33: # %cond.load117 ; CHECK-RV32-NEXT: lbu a2, 0(a0) @@ -1657,8 +1600,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv.s.x v9, a2 ; CHECK-RV32-NEXT: vsetivli zero, 31, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 30 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_34: # %else118 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1793,13 +1736,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 61 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: .LBB61_66: # %else242 -; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 1 +; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: bgez a3, .LBB61_68 ; CHECK-RV32-NEXT: # %bb.67: # %cond.load245 ; CHECK-RV32-NEXT: lbu a3, 0(a0) @@ -1809,8 +1752,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 62 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_68: # %else246 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -1945,13 +1888,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 93 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_100: # %else370 -; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1 +; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: bgez a2, .LBB61_102 ; CHECK-RV32-NEXT: # %bb.101: # %cond.load373 ; CHECK-RV32-NEXT: lbu a2, 0(a0) @@ -1961,8 +1904,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 94 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_102: # %else374 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2097,13 +2040,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 125 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: .LBB61_134: # %else498 -; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: bgez a3, .LBB61_136 ; CHECK-RV32-NEXT: # %bb.135: # %cond.load501 ; CHECK-RV32-NEXT: lbu a3, 0(a0) @@ -2113,8 +2056,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 126 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_136: # %else502 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2249,13 +2192,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 157 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_168: # %else626 -; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1 +; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: bgez a2, .LBB61_170 ; CHECK-RV32-NEXT: # %bb.169: # %cond.load629 ; CHECK-RV32-NEXT: lbu a2, 0(a0) @@ -2265,8 +2208,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 158 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_170: # %else630 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2401,13 +2344,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 189 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: .LBB61_202: # %else754 -; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 3 +; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: bgez a3, .LBB61_204 ; CHECK-RV32-NEXT: # %bb.203: # %cond.load757 ; CHECK-RV32-NEXT: lbu a3, 0(a0) @@ -2417,8 +2360,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 190 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_204: # %else758 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2553,13 +2496,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 221 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_236: # %else882 -; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1 +; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: bgez a2, .LBB61_238 ; CHECK-RV32-NEXT: # %bb.237: # %cond.load885 ; CHECK-RV32-NEXT: lbu a2, 0(a0) @@ -2569,8 +2512,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 222 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_238: # %else886 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2705,13 +2648,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 253 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: .LBB61_270: # %else1010 -; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 4 +; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: bgez a3, .LBB61_272 ; CHECK-RV32-NEXT: # %bb.271: # %cond.load1013 ; CHECK-RV32-NEXT: lbu a3, 0(a0) @@ -2721,8 +2664,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 254 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: .LBB61_272: # %else1014 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2859,9 +2802,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4 ; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: .LBB61_304: # %else1138 -; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1 +; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: bgez a2, .LBB61_306 ; CHECK-RV32-NEXT: # %bb.305: # %cond.load1141 ; CHECK-RV32-NEXT: lbu a2, 0(a0) @@ -3006,9 +2949,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4 ; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: .LBB61_338: # %else1266 -; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 5 +; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: bgez a3, .LBB61_340 ; CHECK-RV32-NEXT: # %bb.339: # %cond.load1269 ; CHECK-RV32-NEXT: lbu a3, 0(a0) @@ -3153,9 +3096,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4 ; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: .LBB61_372: # %else1394 -; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1 +; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: bgez a2, .LBB61_374 ; CHECK-RV32-NEXT: # %bb.373: # %cond.load1397 ; CHECK-RV32-NEXT: lbu a2, 0(a0) @@ -3300,9 +3243,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4 ; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: .LBB61_406: # %else1522 -; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 6 +; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: bgez a3, .LBB61_408 ; CHECK-RV32-NEXT: # %bb.407: # %cond.load1525 ; CHECK-RV32-NEXT: lbu a3, 0(a0) @@ -3447,9 +3390,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4 ; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: .LBB61_440: # %else1650 -; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1 +; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: bgez a2, .LBB61_442 ; CHECK-RV32-NEXT: # %bb.441: # %cond.load1653 ; CHECK-RV32-NEXT: lbu a2, 0(a0) @@ -3594,9 +3537,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vslideup.vx v8, v16, a4 ; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: .LBB61_474: # %else1778 -; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vslidedown.vi v16, v0, 7 +; CHECK-RV32-NEXT: slli a3, a2, 1 ; CHECK-RV32-NEXT: bgez a3, .LBB61_476 ; CHECK-RV32-NEXT: # %bb.475: # %cond.load1781 ; CHECK-RV32-NEXT: lbu a3, 0(a0) @@ -3741,10 +3684,10 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vslideup.vx v8, v24, a4 ; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: .LBB61_508: # %else1906 -; CHECK-RV32-NEXT: slli a2, a3, 1 ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV32-NEXT: vsrl.vx v16, v16, a1 -; CHECK-RV32-NEXT: bgez a2, .LBB61_510 +; CHECK-RV32-NEXT: slli a1, a3, 1 +; CHECK-RV32-NEXT: bgez a1, .LBB61_510 ; CHECK-RV32-NEXT: # %bb.509: # %cond.load1909 ; CHECK-RV32-NEXT: lbu a1, 0(a0) ; CHECK-RV32-NEXT: vmv.s.x v24, a1 @@ -3892,8 +3835,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vsetvli zero, zero, e8, mf8, tu, ma ; CHECK-RV32-NEXT: vmv.s.x v8, a1 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a1, a3, 2 ; CHECK-RV32-NEXT: bnez a1, .LBB61_545 @@ -3904,8 +3847,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 1 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a1, a3, 4 ; CHECK-RV32-NEXT: bnez a1, .LBB61_546 @@ -3916,8 +3859,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 2 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a1, a3, 8 ; CHECK-RV32-NEXT: bnez a1, .LBB61_547 @@ -3928,8 +3871,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 3 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a1, a3, 16 ; CHECK-RV32-NEXT: bnez a1, .LBB61_548 @@ -3940,8 +3883,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a1, a3, 32 ; CHECK-RV32-NEXT: bnez a1, .LBB61_549 @@ -3952,8 +3895,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 5 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a1, a3, 64 ; CHECK-RV32-NEXT: bnez a1, .LBB61_550 @@ -3964,8 +3907,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 6 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a1, a3, 128 ; CHECK-RV32-NEXT: bnez a1, .LBB61_551 @@ -3976,8 +3919,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 7 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a1, a3, 256 ; CHECK-RV32-NEXT: bnez a1, .LBB61_552 @@ -3988,8 +3931,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 8 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a1, a3, 512 ; CHECK-RV32-NEXT: bnez a1, .LBB61_553 @@ -4000,8 +3943,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 9 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a1, a3, 1024 ; CHECK-RV32-NEXT: bnez a1, .LBB61_554 @@ -4012,8 +3955,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 10 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 20 ; CHECK-RV32-NEXT: bltz a1, .LBB61_555 @@ -4024,8 +3967,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 11 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 19 ; CHECK-RV32-NEXT: bltz a1, .LBB61_556 @@ -4036,8 +3979,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 12 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 18 ; CHECK-RV32-NEXT: bltz a1, .LBB61_557 @@ -4048,8 +3991,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 13 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 17 ; CHECK-RV32-NEXT: bltz a1, .LBB61_558 @@ -4060,8 +4003,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 14 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 16 ; CHECK-RV32-NEXT: bltz a1, .LBB61_559 @@ -4072,8 +4015,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 15 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 15 ; CHECK-RV32-NEXT: bltz a1, .LBB61_560 @@ -4084,8 +4027,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 16 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 14 ; CHECK-RV32-NEXT: bltz a1, .LBB61_561 @@ -4096,8 +4039,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 17 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 13 ; CHECK-RV32-NEXT: bltz a1, .LBB61_562 @@ -4108,8 +4051,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 18 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 12 ; CHECK-RV32-NEXT: bltz a1, .LBB61_563 @@ -4120,8 +4063,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 19 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 11 ; CHECK-RV32-NEXT: bltz a1, .LBB61_564 @@ -4132,8 +4075,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 20 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 10 ; CHECK-RV32-NEXT: bltz a1, .LBB61_565 @@ -4144,8 +4087,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 21 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 9 ; CHECK-RV32-NEXT: bltz a1, .LBB61_566 @@ -4156,8 +4099,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 22 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 8 ; CHECK-RV32-NEXT: bltz a1, .LBB61_567 @@ -4168,8 +4111,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 23 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 7 ; CHECK-RV32-NEXT: bltz a1, .LBB61_568 @@ -4180,8 +4123,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 24 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 6 ; CHECK-RV32-NEXT: bltz a1, .LBB61_569 @@ -4192,8 +4135,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 25 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 5 ; CHECK-RV32-NEXT: bltz a1, .LBB61_570 @@ -4204,8 +4147,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 26 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 4 ; CHECK-RV32-NEXT: bltz a1, .LBB61_571 @@ -4216,8 +4159,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: vmv8r.v v16, v8 ; CHECK-RV32-NEXT: vmv.s.x v9, a1 ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 27 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a1, a3, 3 ; CHECK-RV32-NEXT: bgez a1, .LBB61_1025 @@ -4231,8 +4174,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a3, 32 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vi v8, v9, 31 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 1 ; CHECK-RV32-NEXT: bnez a3, .LBB61_573 @@ -4246,8 +4189,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 32 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 2 ; CHECK-RV32-NEXT: bnez a3, .LBB61_574 @@ -4261,8 +4204,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 33 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 4 ; CHECK-RV32-NEXT: bnez a3, .LBB61_575 @@ -4276,8 +4219,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 34 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 8 ; CHECK-RV32-NEXT: bnez a3, .LBB61_576 @@ -4291,8 +4234,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 35 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 16 ; CHECK-RV32-NEXT: bnez a3, .LBB61_577 @@ -4306,8 +4249,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 36 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 32 ; CHECK-RV32-NEXT: bnez a3, .LBB61_578 @@ -4321,8 +4264,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 37 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 64 ; CHECK-RV32-NEXT: bnez a3, .LBB61_579 @@ -4336,8 +4279,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 38 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 128 ; CHECK-RV32-NEXT: bnez a3, .LBB61_580 @@ -4351,8 +4294,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 39 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 256 ; CHECK-RV32-NEXT: bnez a3, .LBB61_581 @@ -4366,8 +4309,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 40 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 512 ; CHECK-RV32-NEXT: bnez a3, .LBB61_582 @@ -4381,8 +4324,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 41 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 1024 ; CHECK-RV32-NEXT: bnez a3, .LBB61_583 @@ -4396,8 +4339,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 42 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 20 ; CHECK-RV32-NEXT: bltz a3, .LBB61_584 @@ -4411,8 +4354,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 43 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 19 ; CHECK-RV32-NEXT: bltz a3, .LBB61_585 @@ -4426,8 +4369,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 44 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 18 ; CHECK-RV32-NEXT: bltz a3, .LBB61_586 @@ -4441,8 +4384,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 45 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 17 ; CHECK-RV32-NEXT: bltz a3, .LBB61_587 @@ -4456,8 +4399,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 46 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 16 ; CHECK-RV32-NEXT: bltz a3, .LBB61_588 @@ -4471,8 +4414,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 47 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 15 ; CHECK-RV32-NEXT: bltz a3, .LBB61_589 @@ -4486,8 +4429,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 48 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 14 ; CHECK-RV32-NEXT: bltz a3, .LBB61_590 @@ -4501,8 +4444,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 49 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 13 ; CHECK-RV32-NEXT: bltz a3, .LBB61_591 @@ -4516,8 +4459,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 50 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 12 ; CHECK-RV32-NEXT: bltz a3, .LBB61_592 @@ -4531,8 +4474,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 51 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 11 ; CHECK-RV32-NEXT: bltz a3, .LBB61_593 @@ -4546,8 +4489,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 52 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 10 ; CHECK-RV32-NEXT: bltz a3, .LBB61_594 @@ -4561,8 +4504,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 53 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 9 ; CHECK-RV32-NEXT: bltz a3, .LBB61_595 @@ -4576,8 +4519,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 54 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 8 ; CHECK-RV32-NEXT: bltz a3, .LBB61_596 @@ -4591,8 +4534,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 55 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 7 ; CHECK-RV32-NEXT: bltz a3, .LBB61_597 @@ -4606,8 +4549,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 56 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 6 ; CHECK-RV32-NEXT: bltz a3, .LBB61_598 @@ -4621,8 +4564,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 57 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 5 ; CHECK-RV32-NEXT: bltz a3, .LBB61_599 @@ -4636,8 +4579,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 58 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 4 ; CHECK-RV32-NEXT: bltz a3, .LBB61_600 @@ -4651,8 +4594,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 59 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 3 ; CHECK-RV32-NEXT: bltz a3, .LBB61_601 @@ -4666,8 +4609,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 60 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 2 ; CHECK-RV32-NEXT: bgez a3, .LBB61_1026 @@ -4682,8 +4625,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 63 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m1, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v9, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv1r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 1 ; CHECK-RV32-NEXT: bnez a2, .LBB61_603 @@ -4697,8 +4640,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 64 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 2 ; CHECK-RV32-NEXT: bnez a2, .LBB61_604 @@ -4712,8 +4655,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 65 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 4 ; CHECK-RV32-NEXT: bnez a2, .LBB61_605 @@ -4727,8 +4670,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 66 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 8 ; CHECK-RV32-NEXT: bnez a2, .LBB61_606 @@ -4742,8 +4685,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 67 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 16 ; CHECK-RV32-NEXT: bnez a2, .LBB61_607 @@ -4757,8 +4700,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 68 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 32 ; CHECK-RV32-NEXT: bnez a2, .LBB61_608 @@ -4772,8 +4715,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 69 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 64 ; CHECK-RV32-NEXT: bnez a2, .LBB61_609 @@ -4787,8 +4730,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 70 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 128 ; CHECK-RV32-NEXT: bnez a2, .LBB61_610 @@ -4802,8 +4745,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 71 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 256 ; CHECK-RV32-NEXT: bnez a2, .LBB61_611 @@ -4817,8 +4760,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 72 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 512 ; CHECK-RV32-NEXT: bnez a2, .LBB61_612 @@ -4832,8 +4775,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 73 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 1024 ; CHECK-RV32-NEXT: bnez a2, .LBB61_613 @@ -4847,8 +4790,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 74 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 20 ; CHECK-RV32-NEXT: bltz a2, .LBB61_614 @@ -4862,8 +4805,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 75 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 19 ; CHECK-RV32-NEXT: bltz a2, .LBB61_615 @@ -4877,8 +4820,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 76 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 18 ; CHECK-RV32-NEXT: bltz a2, .LBB61_616 @@ -4892,8 +4835,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 77 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 17 ; CHECK-RV32-NEXT: bltz a2, .LBB61_617 @@ -4907,8 +4850,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 78 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 16 ; CHECK-RV32-NEXT: bltz a2, .LBB61_618 @@ -4922,8 +4865,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 79 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 15 ; CHECK-RV32-NEXT: bltz a2, .LBB61_619 @@ -4937,8 +4880,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 80 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 14 ; CHECK-RV32-NEXT: bltz a2, .LBB61_620 @@ -4952,8 +4895,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 81 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 13 ; CHECK-RV32-NEXT: bltz a2, .LBB61_621 @@ -4967,8 +4910,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 82 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 12 ; CHECK-RV32-NEXT: bltz a2, .LBB61_622 @@ -4982,8 +4925,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 83 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 11 ; CHECK-RV32-NEXT: bltz a2, .LBB61_623 @@ -4997,8 +4940,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 84 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 10 ; CHECK-RV32-NEXT: bltz a2, .LBB61_624 @@ -5012,8 +4955,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 85 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 9 ; CHECK-RV32-NEXT: bltz a2, .LBB61_625 @@ -5027,8 +4970,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 86 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 8 ; CHECK-RV32-NEXT: bltz a2, .LBB61_626 @@ -5042,8 +4985,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 87 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 7 ; CHECK-RV32-NEXT: bltz a2, .LBB61_627 @@ -5057,8 +5000,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 88 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 6 ; CHECK-RV32-NEXT: bltz a2, .LBB61_628 @@ -5072,8 +5015,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 89 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 5 ; CHECK-RV32-NEXT: bltz a2, .LBB61_629 @@ -5087,8 +5030,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 90 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 4 ; CHECK-RV32-NEXT: bltz a2, .LBB61_630 @@ -5102,8 +5045,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 91 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 3 ; CHECK-RV32-NEXT: bltz a2, .LBB61_631 @@ -5117,8 +5060,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 92 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 2 ; CHECK-RV32-NEXT: bgez a2, .LBB61_1027 @@ -5133,8 +5076,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 95 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 1 ; CHECK-RV32-NEXT: bnez a3, .LBB61_633 @@ -5148,8 +5091,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 96 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 2 ; CHECK-RV32-NEXT: bnez a3, .LBB61_634 @@ -5163,8 +5106,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 97 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 4 ; CHECK-RV32-NEXT: bnez a3, .LBB61_635 @@ -5178,8 +5121,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 98 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 8 ; CHECK-RV32-NEXT: bnez a3, .LBB61_636 @@ -5193,8 +5136,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 99 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 16 ; CHECK-RV32-NEXT: bnez a3, .LBB61_637 @@ -5208,8 +5151,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 100 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 32 ; CHECK-RV32-NEXT: bnez a3, .LBB61_638 @@ -5223,8 +5166,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 101 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 64 ; CHECK-RV32-NEXT: bnez a3, .LBB61_639 @@ -5238,8 +5181,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 102 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 128 ; CHECK-RV32-NEXT: bnez a3, .LBB61_640 @@ -5253,8 +5196,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 103 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 256 ; CHECK-RV32-NEXT: bnez a3, .LBB61_641 @@ -5268,8 +5211,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 104 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 512 ; CHECK-RV32-NEXT: bnez a3, .LBB61_642 @@ -5283,8 +5226,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 105 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 1024 ; CHECK-RV32-NEXT: bnez a3, .LBB61_643 @@ -5298,8 +5241,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 106 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 20 ; CHECK-RV32-NEXT: bltz a3, .LBB61_644 @@ -5313,8 +5256,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 107 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 19 ; CHECK-RV32-NEXT: bltz a3, .LBB61_645 @@ -5328,8 +5271,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 108 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 18 ; CHECK-RV32-NEXT: bltz a3, .LBB61_646 @@ -5343,8 +5286,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 109 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 17 ; CHECK-RV32-NEXT: bltz a3, .LBB61_647 @@ -5358,8 +5301,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 110 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 16 ; CHECK-RV32-NEXT: bltz a3, .LBB61_648 @@ -5373,8 +5316,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 111 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 15 ; CHECK-RV32-NEXT: bltz a3, .LBB61_649 @@ -5388,8 +5331,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 112 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 14 ; CHECK-RV32-NEXT: bltz a3, .LBB61_650 @@ -5403,8 +5346,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 113 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 13 ; CHECK-RV32-NEXT: bltz a3, .LBB61_651 @@ -5418,8 +5361,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 114 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 12 ; CHECK-RV32-NEXT: bltz a3, .LBB61_652 @@ -5433,8 +5376,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 115 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 11 ; CHECK-RV32-NEXT: bltz a3, .LBB61_653 @@ -5448,8 +5391,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 116 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 10 ; CHECK-RV32-NEXT: bltz a3, .LBB61_654 @@ -5463,8 +5406,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 117 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 9 ; CHECK-RV32-NEXT: bltz a3, .LBB61_655 @@ -5478,8 +5421,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 118 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 8 ; CHECK-RV32-NEXT: bltz a3, .LBB61_656 @@ -5493,8 +5436,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 119 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 7 ; CHECK-RV32-NEXT: bltz a3, .LBB61_657 @@ -5508,8 +5451,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 120 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 6 ; CHECK-RV32-NEXT: bltz a3, .LBB61_658 @@ -5523,8 +5466,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 121 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 5 ; CHECK-RV32-NEXT: bltz a3, .LBB61_659 @@ -5538,8 +5481,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 122 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 4 ; CHECK-RV32-NEXT: bltz a3, .LBB61_660 @@ -5553,8 +5496,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 123 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 3 ; CHECK-RV32-NEXT: bltz a3, .LBB61_661 @@ -5568,8 +5511,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 124 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 2 ; CHECK-RV32-NEXT: bgez a3, .LBB61_1028 @@ -5584,8 +5527,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 127 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v10, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv2r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 1 ; CHECK-RV32-NEXT: bnez a2, .LBB61_663 @@ -5599,8 +5542,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 128 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 2 ; CHECK-RV32-NEXT: bnez a2, .LBB61_664 @@ -5614,8 +5557,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 129 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 4 ; CHECK-RV32-NEXT: bnez a2, .LBB61_665 @@ -5629,8 +5572,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 130 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 8 ; CHECK-RV32-NEXT: bnez a2, .LBB61_666 @@ -5644,8 +5587,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 131 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 16 ; CHECK-RV32-NEXT: bnez a2, .LBB61_667 @@ -5659,8 +5602,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 132 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 32 ; CHECK-RV32-NEXT: bnez a2, .LBB61_668 @@ -5674,8 +5617,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 133 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 64 ; CHECK-RV32-NEXT: bnez a2, .LBB61_669 @@ -5689,8 +5632,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 134 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 128 ; CHECK-RV32-NEXT: bnez a2, .LBB61_670 @@ -5704,8 +5647,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 135 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 256 ; CHECK-RV32-NEXT: bnez a2, .LBB61_671 @@ -5719,8 +5662,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 136 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 512 ; CHECK-RV32-NEXT: bnez a2, .LBB61_672 @@ -5734,8 +5677,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 137 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 1024 ; CHECK-RV32-NEXT: bnez a2, .LBB61_673 @@ -5749,8 +5692,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 138 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 20 ; CHECK-RV32-NEXT: bltz a2, .LBB61_674 @@ -5764,8 +5707,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 139 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 19 ; CHECK-RV32-NEXT: bltz a2, .LBB61_675 @@ -5779,8 +5722,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 140 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 18 ; CHECK-RV32-NEXT: bltz a2, .LBB61_676 @@ -5794,8 +5737,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 141 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 17 ; CHECK-RV32-NEXT: bltz a2, .LBB61_677 @@ -5809,8 +5752,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 142 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 16 ; CHECK-RV32-NEXT: bltz a2, .LBB61_678 @@ -5824,8 +5767,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 143 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 15 ; CHECK-RV32-NEXT: bltz a2, .LBB61_679 @@ -5839,8 +5782,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 144 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 14 ; CHECK-RV32-NEXT: bltz a2, .LBB61_680 @@ -5854,8 +5797,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 145 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 13 ; CHECK-RV32-NEXT: bltz a2, .LBB61_681 @@ -5869,8 +5812,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 146 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 12 ; CHECK-RV32-NEXT: bltz a2, .LBB61_682 @@ -5884,8 +5827,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 147 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 11 ; CHECK-RV32-NEXT: bltz a2, .LBB61_683 @@ -5899,8 +5842,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 148 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 10 ; CHECK-RV32-NEXT: bltz a2, .LBB61_684 @@ -5914,8 +5857,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 149 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 9 ; CHECK-RV32-NEXT: bltz a2, .LBB61_685 @@ -5929,8 +5872,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 150 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 8 ; CHECK-RV32-NEXT: bltz a2, .LBB61_686 @@ -5944,8 +5887,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 151 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 7 ; CHECK-RV32-NEXT: bltz a2, .LBB61_687 @@ -5959,8 +5902,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 152 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 6 ; CHECK-RV32-NEXT: bltz a2, .LBB61_688 @@ -5974,8 +5917,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 153 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 5 ; CHECK-RV32-NEXT: bltz a2, .LBB61_689 @@ -5989,8 +5932,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 154 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 4 ; CHECK-RV32-NEXT: bltz a2, .LBB61_690 @@ -6004,8 +5947,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 155 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 3 ; CHECK-RV32-NEXT: bltz a2, .LBB61_691 @@ -6019,8 +5962,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 156 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 2 ; CHECK-RV32-NEXT: bgez a2, .LBB61_1029 @@ -6035,8 +5978,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 159 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 1 ; CHECK-RV32-NEXT: bnez a3, .LBB61_693 @@ -6050,8 +5993,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 160 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 2 ; CHECK-RV32-NEXT: bnez a3, .LBB61_694 @@ -6065,8 +6008,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 161 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 4 ; CHECK-RV32-NEXT: bnez a3, .LBB61_695 @@ -6080,8 +6023,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 162 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 8 ; CHECK-RV32-NEXT: bnez a3, .LBB61_696 @@ -6095,8 +6038,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 163 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 16 ; CHECK-RV32-NEXT: bnez a3, .LBB61_697 @@ -6110,8 +6053,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 164 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 32 ; CHECK-RV32-NEXT: bnez a3, .LBB61_698 @@ -6125,8 +6068,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 165 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 64 ; CHECK-RV32-NEXT: bnez a3, .LBB61_699 @@ -6140,8 +6083,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 166 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 128 ; CHECK-RV32-NEXT: bnez a3, .LBB61_700 @@ -6155,8 +6098,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 167 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 256 ; CHECK-RV32-NEXT: bnez a3, .LBB61_701 @@ -6170,8 +6113,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 168 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 512 ; CHECK-RV32-NEXT: bnez a3, .LBB61_702 @@ -6185,8 +6128,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 169 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 1024 ; CHECK-RV32-NEXT: bnez a3, .LBB61_703 @@ -6200,8 +6143,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 170 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 20 ; CHECK-RV32-NEXT: bltz a3, .LBB61_704 @@ -6215,8 +6158,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 171 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 19 ; CHECK-RV32-NEXT: bltz a3, .LBB61_705 @@ -6230,8 +6173,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 172 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 18 ; CHECK-RV32-NEXT: bltz a3, .LBB61_706 @@ -6245,8 +6188,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 173 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 17 ; CHECK-RV32-NEXT: bltz a3, .LBB61_707 @@ -6260,8 +6203,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 174 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 16 ; CHECK-RV32-NEXT: bltz a3, .LBB61_708 @@ -6275,8 +6218,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 175 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 15 ; CHECK-RV32-NEXT: bltz a3, .LBB61_709 @@ -6290,8 +6233,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 176 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 14 ; CHECK-RV32-NEXT: bltz a3, .LBB61_710 @@ -6305,8 +6248,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 177 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 13 ; CHECK-RV32-NEXT: bltz a3, .LBB61_711 @@ -6320,8 +6263,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 178 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 12 ; CHECK-RV32-NEXT: bltz a3, .LBB61_712 @@ -6335,8 +6278,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 179 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 11 ; CHECK-RV32-NEXT: bltz a3, .LBB61_713 @@ -6350,8 +6293,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 180 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 10 ; CHECK-RV32-NEXT: bltz a3, .LBB61_714 @@ -6365,8 +6308,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 181 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 9 ; CHECK-RV32-NEXT: bltz a3, .LBB61_715 @@ -6380,8 +6323,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 182 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 8 ; CHECK-RV32-NEXT: bltz a3, .LBB61_716 @@ -6395,8 +6338,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 183 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 7 ; CHECK-RV32-NEXT: bltz a3, .LBB61_717 @@ -6410,8 +6353,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 184 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 6 ; CHECK-RV32-NEXT: bltz a3, .LBB61_718 @@ -6425,8 +6368,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 185 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 5 ; CHECK-RV32-NEXT: bltz a3, .LBB61_719 @@ -6440,8 +6383,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 186 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 4 ; CHECK-RV32-NEXT: bltz a3, .LBB61_720 @@ -6455,8 +6398,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 187 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 3 ; CHECK-RV32-NEXT: bltz a3, .LBB61_721 @@ -6470,8 +6413,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 188 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 2 ; CHECK-RV32-NEXT: bgez a3, .LBB61_1030 @@ -6486,8 +6429,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 191 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 1 ; CHECK-RV32-NEXT: bnez a2, .LBB61_723 @@ -6501,8 +6444,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 192 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 2 ; CHECK-RV32-NEXT: bnez a2, .LBB61_724 @@ -6516,8 +6459,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 193 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 4 ; CHECK-RV32-NEXT: bnez a2, .LBB61_725 @@ -6531,8 +6474,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 194 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 8 ; CHECK-RV32-NEXT: bnez a2, .LBB61_726 @@ -6546,8 +6489,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 195 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 16 ; CHECK-RV32-NEXT: bnez a2, .LBB61_727 @@ -6561,8 +6504,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 196 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 32 ; CHECK-RV32-NEXT: bnez a2, .LBB61_728 @@ -6576,8 +6519,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 197 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 64 ; CHECK-RV32-NEXT: bnez a2, .LBB61_729 @@ -6591,8 +6534,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 198 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 128 ; CHECK-RV32-NEXT: bnez a2, .LBB61_730 @@ -6606,8 +6549,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 199 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 256 ; CHECK-RV32-NEXT: bnez a2, .LBB61_731 @@ -6621,8 +6564,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 200 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 512 ; CHECK-RV32-NEXT: bnez a2, .LBB61_732 @@ -6636,8 +6579,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 201 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 1024 ; CHECK-RV32-NEXT: bnez a2, .LBB61_733 @@ -6651,8 +6594,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 202 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 20 ; CHECK-RV32-NEXT: bltz a2, .LBB61_734 @@ -6666,8 +6609,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 203 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 19 ; CHECK-RV32-NEXT: bltz a2, .LBB61_735 @@ -6681,8 +6624,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 204 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 18 ; CHECK-RV32-NEXT: bltz a2, .LBB61_736 @@ -6696,8 +6639,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 205 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 17 ; CHECK-RV32-NEXT: bltz a2, .LBB61_737 @@ -6711,8 +6654,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 206 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 16 ; CHECK-RV32-NEXT: bltz a2, .LBB61_738 @@ -6726,8 +6669,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 207 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 15 ; CHECK-RV32-NEXT: bltz a2, .LBB61_739 @@ -6741,8 +6684,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 208 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 14 ; CHECK-RV32-NEXT: bltz a2, .LBB61_740 @@ -6756,8 +6699,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 209 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 13 ; CHECK-RV32-NEXT: bltz a2, .LBB61_741 @@ -6771,8 +6714,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 210 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 12 ; CHECK-RV32-NEXT: bltz a2, .LBB61_742 @@ -6786,8 +6729,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 211 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 11 ; CHECK-RV32-NEXT: bltz a2, .LBB61_743 @@ -6801,8 +6744,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 212 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 10 ; CHECK-RV32-NEXT: bltz a2, .LBB61_744 @@ -6816,8 +6759,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 213 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 9 ; CHECK-RV32-NEXT: bltz a2, .LBB61_745 @@ -6831,8 +6774,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 214 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 8 ; CHECK-RV32-NEXT: bltz a2, .LBB61_746 @@ -6846,8 +6789,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 215 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 7 ; CHECK-RV32-NEXT: bltz a2, .LBB61_747 @@ -6861,8 +6804,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 216 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 6 ; CHECK-RV32-NEXT: bltz a2, .LBB61_748 @@ -6876,8 +6819,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 217 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 5 ; CHECK-RV32-NEXT: bltz a2, .LBB61_749 @@ -6891,8 +6834,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 218 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 4 ; CHECK-RV32-NEXT: bltz a2, .LBB61_750 @@ -6906,8 +6849,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 219 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 3 ; CHECK-RV32-NEXT: bltz a2, .LBB61_751 @@ -6921,8 +6864,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 220 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: slli a2, a3, 2 ; CHECK-RV32-NEXT: bgez a2, .LBB61_1031 @@ -6937,8 +6880,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 223 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 1 ; CHECK-RV32-NEXT: bnez a3, .LBB61_753 @@ -6952,8 +6895,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 224 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 2 ; CHECK-RV32-NEXT: bnez a3, .LBB61_754 @@ -6967,8 +6910,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 225 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 4 ; CHECK-RV32-NEXT: bnez a3, .LBB61_755 @@ -6982,8 +6925,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 226 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 8 ; CHECK-RV32-NEXT: bnez a3, .LBB61_756 @@ -6997,8 +6940,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 227 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 16 ; CHECK-RV32-NEXT: bnez a3, .LBB61_757 @@ -7012,8 +6955,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 228 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 32 ; CHECK-RV32-NEXT: bnez a3, .LBB61_758 @@ -7027,8 +6970,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 229 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 64 ; CHECK-RV32-NEXT: bnez a3, .LBB61_759 @@ -7042,8 +6985,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 230 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 128 ; CHECK-RV32-NEXT: bnez a3, .LBB61_760 @@ -7057,8 +7000,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 231 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 256 ; CHECK-RV32-NEXT: bnez a3, .LBB61_761 @@ -7072,8 +7015,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 232 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 512 ; CHECK-RV32-NEXT: bnez a3, .LBB61_762 @@ -7087,8 +7030,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 233 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: andi a3, a2, 1024 ; CHECK-RV32-NEXT: bnez a3, .LBB61_763 @@ -7102,8 +7045,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 234 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 20 ; CHECK-RV32-NEXT: bltz a3, .LBB61_764 @@ -7117,8 +7060,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 235 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 19 ; CHECK-RV32-NEXT: bltz a3, .LBB61_765 @@ -7132,8 +7075,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 236 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 18 ; CHECK-RV32-NEXT: bltz a3, .LBB61_766 @@ -7147,8 +7090,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 237 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 17 ; CHECK-RV32-NEXT: bltz a3, .LBB61_767 @@ -7162,8 +7105,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 238 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 16 ; CHECK-RV32-NEXT: bltz a3, .LBB61_768 @@ -7177,8 +7120,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 239 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 15 ; CHECK-RV32-NEXT: bltz a3, .LBB61_769 @@ -7192,8 +7135,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 240 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 14 ; CHECK-RV32-NEXT: bltz a3, .LBB61_770 @@ -7207,8 +7150,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 241 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 13 ; CHECK-RV32-NEXT: bltz a3, .LBB61_771 @@ -7222,8 +7165,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 242 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 12 ; CHECK-RV32-NEXT: bltz a3, .LBB61_772 @@ -7237,8 +7180,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 243 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 11 ; CHECK-RV32-NEXT: bltz a3, .LBB61_773 @@ -7252,8 +7195,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 244 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 10 ; CHECK-RV32-NEXT: bltz a3, .LBB61_774 @@ -7267,8 +7210,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 245 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 9 ; CHECK-RV32-NEXT: bltz a3, .LBB61_775 @@ -7282,8 +7225,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 246 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 8 ; CHECK-RV32-NEXT: bltz a3, .LBB61_776 @@ -7297,8 +7240,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 247 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 7 ; CHECK-RV32-NEXT: bltz a3, .LBB61_777 @@ -7312,8 +7255,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 248 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 6 ; CHECK-RV32-NEXT: bltz a3, .LBB61_778 @@ -7327,8 +7270,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 249 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 5 ; CHECK-RV32-NEXT: bltz a3, .LBB61_779 @@ -7342,8 +7285,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 250 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 4 ; CHECK-RV32-NEXT: bltz a3, .LBB61_780 @@ -7357,8 +7300,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 251 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 3 ; CHECK-RV32-NEXT: bltz a3, .LBB61_781 @@ -7372,8 +7315,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 252 ; CHECK-RV32-NEXT: vsetvli zero, a3, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v16, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v16 ; CHECK-RV32-NEXT: slli a3, a2, 2 ; CHECK-RV32-NEXT: bgez a3, .LBB61_1032 @@ -7388,8 +7331,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV32-NEXT: li a4, 255 ; CHECK-RV32-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV32-NEXT: vslideup.vx v8, v12, a4 -; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv4r.v v24, v8 +; CHECK-RV32-NEXT: addi a0, a0, 1 ; CHECK-RV32-NEXT: vmv8r.v v8, v24 ; CHECK-RV32-NEXT: andi a2, a3, 1 ; CHECK-RV32-NEXT: bnez a2, .LBB61_783 @@ -10794,13 +10737,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 61 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: .LBB61_63: # %else242 -; CHECK-RV64-NEXT: slli a1, a2, 1 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 1 +; CHECK-RV64-NEXT: slli a1, a2, 1 ; CHECK-RV64-NEXT: bgez a1, .LBB61_65 ; CHECK-RV64-NEXT: # %bb.64: # %cond.load245 ; CHECK-RV64-NEXT: lbu a1, 0(a0) @@ -10810,8 +10753,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 62 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v24, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v24 ; CHECK-RV64-NEXT: .LBB61_65: # %else246 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -11074,13 +11017,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 125 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: .LBB61_129: # %else498 -; CHECK-RV64-NEXT: slli a2, a1, 1 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-RV64-NEXT: slli a2, a1, 1 ; CHECK-RV64-NEXT: bgez a2, .LBB61_131 ; CHECK-RV64-NEXT: # %bb.130: # %cond.load501 ; CHECK-RV64-NEXT: lbu a2, 0(a0) @@ -11090,8 +11033,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 126 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v24, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v24 ; CHECK-RV64-NEXT: .LBB61_131: # %else502 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -11354,13 +11297,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 189 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: .LBB61_195: # %else754 -; CHECK-RV64-NEXT: slli a1, a2, 1 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 3 +; CHECK-RV64-NEXT: slli a1, a2, 1 ; CHECK-RV64-NEXT: bgez a1, .LBB61_197 ; CHECK-RV64-NEXT: # %bb.196: # %cond.load757 ; CHECK-RV64-NEXT: lbu a1, 0(a0) @@ -11370,8 +11313,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 190 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v24, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v24 ; CHECK-RV64-NEXT: .LBB61_197: # %else758 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -11634,13 +11577,13 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 253 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: .LBB61_261: # %else1010 -; CHECK-RV64-NEXT: slli a2, a1, 1 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 4 +; CHECK-RV64-NEXT: slli a2, a1, 1 ; CHECK-RV64-NEXT: bgez a2, .LBB61_263 ; CHECK-RV64-NEXT: # %bb.262: # %cond.load1013 ; CHECK-RV64-NEXT: lbu a2, 0(a0) @@ -11650,8 +11593,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 254 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v24, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v24 ; CHECK-RV64-NEXT: .LBB61_263: # %else1014 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -11916,9 +11859,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3 ; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: .LBB61_327: # %else1266 -; CHECK-RV64-NEXT: slli a1, a2, 1 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 5 +; CHECK-RV64-NEXT: slli a1, a2, 1 ; CHECK-RV64-NEXT: bgez a1, .LBB61_329 ; CHECK-RV64-NEXT: # %bb.328: # %cond.load1269 ; CHECK-RV64-NEXT: lbu a1, 0(a0) @@ -12191,9 +12134,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3 ; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: .LBB61_393: # %else1522 -; CHECK-RV64-NEXT: slli a2, a1, 1 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 6 +; CHECK-RV64-NEXT: slli a2, a1, 1 ; CHECK-RV64-NEXT: bgez a2, .LBB61_395 ; CHECK-RV64-NEXT: # %bb.394: # %cond.load1525 ; CHECK-RV64-NEXT: lbu a2, 0(a0) @@ -12466,9 +12409,9 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vslideup.vx v8, v16, a3 ; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: .LBB61_459: # %else1778 -; CHECK-RV64-NEXT: slli a1, a2, 1 ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-RV64-NEXT: vslidedown.vi v16, v0, 7 +; CHECK-RV64-NEXT: slli a1, a2, 1 ; CHECK-RV64-NEXT: bgez a1, .LBB61_461 ; CHECK-RV64-NEXT: # %bb.460: # %cond.load1781 ; CHECK-RV64-NEXT: lbu a1, 0(a0) @@ -12745,8 +12688,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vsetvli zero, zero, e8, mf8, tu, ma ; CHECK-RV64-NEXT: vmv.s.x v8, a1 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 2 ; CHECK-RV64-NEXT: bnez a1, .LBB61_528 @@ -12757,8 +12700,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 1 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 4 ; CHECK-RV64-NEXT: bnez a1, .LBB61_529 @@ -12769,8 +12712,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 2 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 8 ; CHECK-RV64-NEXT: bnez a1, .LBB61_530 @@ -12781,8 +12724,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 16 ; CHECK-RV64-NEXT: bnez a1, .LBB61_531 @@ -12793,8 +12736,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 4 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 32 ; CHECK-RV64-NEXT: bnez a1, .LBB61_532 @@ -12805,8 +12748,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 5 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 64 ; CHECK-RV64-NEXT: bnez a1, .LBB61_533 @@ -12817,8 +12760,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 6 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 128 ; CHECK-RV64-NEXT: bnez a1, .LBB61_534 @@ -12829,8 +12772,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 7 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 256 ; CHECK-RV64-NEXT: bnez a1, .LBB61_535 @@ -12841,8 +12784,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 8 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 512 ; CHECK-RV64-NEXT: bnez a1, .LBB61_536 @@ -12853,8 +12796,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 9 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 1024 ; CHECK-RV64-NEXT: bnez a1, .LBB61_537 @@ -12865,8 +12808,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 10 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 52 ; CHECK-RV64-NEXT: bltz a1, .LBB61_538 @@ -12877,8 +12820,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 11 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 51 ; CHECK-RV64-NEXT: bltz a1, .LBB61_539 @@ -12889,8 +12832,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 12 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 50 ; CHECK-RV64-NEXT: bltz a1, .LBB61_540 @@ -12901,8 +12844,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 13 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 49 ; CHECK-RV64-NEXT: bltz a1, .LBB61_541 @@ -12913,8 +12856,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 14 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 48 ; CHECK-RV64-NEXT: bltz a1, .LBB61_542 @@ -12925,8 +12868,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 15 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 47 ; CHECK-RV64-NEXT: bltz a1, .LBB61_543 @@ -12937,8 +12880,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 16 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 46 ; CHECK-RV64-NEXT: bltz a1, .LBB61_544 @@ -12949,8 +12892,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 17 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 45 ; CHECK-RV64-NEXT: bltz a1, .LBB61_545 @@ -12961,8 +12904,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 18 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 44 ; CHECK-RV64-NEXT: bltz a1, .LBB61_546 @@ -12973,8 +12916,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 19 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 43 ; CHECK-RV64-NEXT: bltz a1, .LBB61_547 @@ -12985,8 +12928,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 20 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 42 ; CHECK-RV64-NEXT: bltz a1, .LBB61_548 @@ -12997,8 +12940,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 21 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 41 ; CHECK-RV64-NEXT: bltz a1, .LBB61_549 @@ -13009,8 +12952,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 22 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 40 ; CHECK-RV64-NEXT: bltz a1, .LBB61_550 @@ -13021,8 +12964,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 23 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 39 ; CHECK-RV64-NEXT: bltz a1, .LBB61_551 @@ -13033,8 +12976,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 24 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 38 ; CHECK-RV64-NEXT: bltz a1, .LBB61_552 @@ -13045,8 +12988,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 25 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 37 ; CHECK-RV64-NEXT: bltz a1, .LBB61_553 @@ -13057,8 +13000,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 26 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 36 ; CHECK-RV64-NEXT: bltz a1, .LBB61_554 @@ -13069,8 +13012,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 27 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 35 ; CHECK-RV64-NEXT: bltz a1, .LBB61_555 @@ -13081,8 +13024,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 28 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 34 ; CHECK-RV64-NEXT: bltz a1, .LBB61_556 @@ -13093,8 +13036,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 29 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 33 ; CHECK-RV64-NEXT: bltz a1, .LBB61_557 @@ -13105,8 +13048,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: vmv8r.v v16, v8 ; CHECK-RV64-NEXT: vmv.s.x v9, a1 ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 30 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 32 ; CHECK-RV64-NEXT: bltz a1, .LBB61_558 @@ -13119,8 +13062,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a1, 32 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vi v8, v9, 31 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 31 ; CHECK-RV64-NEXT: bltz a1, .LBB61_559 @@ -13134,8 +13077,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 32 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 30 ; CHECK-RV64-NEXT: bltz a1, .LBB61_560 @@ -13149,8 +13092,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 33 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 29 ; CHECK-RV64-NEXT: bltz a1, .LBB61_561 @@ -13164,8 +13107,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 34 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 28 ; CHECK-RV64-NEXT: bltz a1, .LBB61_562 @@ -13179,8 +13122,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 35 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 27 ; CHECK-RV64-NEXT: bltz a1, .LBB61_563 @@ -13194,8 +13137,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 36 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 26 ; CHECK-RV64-NEXT: bltz a1, .LBB61_564 @@ -13209,8 +13152,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 37 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 25 ; CHECK-RV64-NEXT: bltz a1, .LBB61_565 @@ -13224,8 +13167,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 38 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 24 ; CHECK-RV64-NEXT: bltz a1, .LBB61_566 @@ -13239,8 +13182,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 39 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 23 ; CHECK-RV64-NEXT: bltz a1, .LBB61_567 @@ -13254,8 +13197,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 40 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 22 ; CHECK-RV64-NEXT: bltz a1, .LBB61_568 @@ -13269,8 +13212,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 41 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 21 ; CHECK-RV64-NEXT: bltz a1, .LBB61_569 @@ -13284,8 +13227,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 42 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 20 ; CHECK-RV64-NEXT: bltz a1, .LBB61_570 @@ -13299,8 +13242,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 43 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 19 ; CHECK-RV64-NEXT: bltz a1, .LBB61_571 @@ -13314,8 +13257,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 44 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 18 ; CHECK-RV64-NEXT: bltz a1, .LBB61_572 @@ -13329,8 +13272,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 45 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 17 ; CHECK-RV64-NEXT: bltz a1, .LBB61_573 @@ -13344,8 +13287,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 46 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 16 ; CHECK-RV64-NEXT: bltz a1, .LBB61_574 @@ -13359,8 +13302,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 47 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 15 ; CHECK-RV64-NEXT: bltz a1, .LBB61_575 @@ -13374,8 +13317,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 48 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 14 ; CHECK-RV64-NEXT: bltz a1, .LBB61_576 @@ -13389,8 +13332,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 49 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 13 ; CHECK-RV64-NEXT: bltz a1, .LBB61_577 @@ -13404,8 +13347,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 50 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 12 ; CHECK-RV64-NEXT: bltz a1, .LBB61_578 @@ -13419,8 +13362,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 51 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 11 ; CHECK-RV64-NEXT: bltz a1, .LBB61_579 @@ -13434,8 +13377,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 52 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 10 ; CHECK-RV64-NEXT: bltz a1, .LBB61_580 @@ -13449,8 +13392,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 53 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 9 ; CHECK-RV64-NEXT: bltz a1, .LBB61_581 @@ -13464,8 +13407,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 54 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 8 ; CHECK-RV64-NEXT: bltz a1, .LBB61_582 @@ -13479,8 +13422,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 55 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 7 ; CHECK-RV64-NEXT: bltz a1, .LBB61_583 @@ -13494,8 +13437,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 56 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 6 ; CHECK-RV64-NEXT: bltz a1, .LBB61_584 @@ -13509,8 +13452,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 57 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 5 ; CHECK-RV64-NEXT: bltz a1, .LBB61_585 @@ -13524,8 +13467,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 58 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 4 ; CHECK-RV64-NEXT: bltz a1, .LBB61_586 @@ -13539,8 +13482,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 59 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 3 ; CHECK-RV64-NEXT: bltz a1, .LBB61_587 @@ -13554,8 +13497,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 60 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 2 ; CHECK-RV64-NEXT: bgez a1, .LBB61_1025 @@ -13570,8 +13513,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 63 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m1, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v9, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv1r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 1 ; CHECK-RV64-NEXT: bnez a2, .LBB61_589 @@ -13585,8 +13528,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 64 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 2 ; CHECK-RV64-NEXT: bnez a2, .LBB61_590 @@ -13600,8 +13543,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 65 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 4 ; CHECK-RV64-NEXT: bnez a2, .LBB61_591 @@ -13615,8 +13558,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 66 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 8 ; CHECK-RV64-NEXT: bnez a2, .LBB61_592 @@ -13630,8 +13573,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 67 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 16 ; CHECK-RV64-NEXT: bnez a2, .LBB61_593 @@ -13645,8 +13588,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 68 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 32 ; CHECK-RV64-NEXT: bnez a2, .LBB61_594 @@ -13660,8 +13603,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 69 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 64 ; CHECK-RV64-NEXT: bnez a2, .LBB61_595 @@ -13675,8 +13618,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 70 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 128 ; CHECK-RV64-NEXT: bnez a2, .LBB61_596 @@ -13690,8 +13633,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 71 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 256 ; CHECK-RV64-NEXT: bnez a2, .LBB61_597 @@ -13705,8 +13648,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 72 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 512 ; CHECK-RV64-NEXT: bnez a2, .LBB61_598 @@ -13720,8 +13663,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 73 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 1024 ; CHECK-RV64-NEXT: bnez a2, .LBB61_599 @@ -13735,8 +13678,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 74 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 52 ; CHECK-RV64-NEXT: bltz a2, .LBB61_600 @@ -13750,8 +13693,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 75 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 51 ; CHECK-RV64-NEXT: bltz a2, .LBB61_601 @@ -13765,8 +13708,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 76 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 50 ; CHECK-RV64-NEXT: bltz a2, .LBB61_602 @@ -13780,8 +13723,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 77 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 49 ; CHECK-RV64-NEXT: bltz a2, .LBB61_603 @@ -13795,8 +13738,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 78 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 48 ; CHECK-RV64-NEXT: bltz a2, .LBB61_604 @@ -13810,8 +13753,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 79 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 47 ; CHECK-RV64-NEXT: bltz a2, .LBB61_605 @@ -13825,8 +13768,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 80 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 46 ; CHECK-RV64-NEXT: bltz a2, .LBB61_606 @@ -13840,8 +13783,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 81 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 45 ; CHECK-RV64-NEXT: bltz a2, .LBB61_607 @@ -13855,8 +13798,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 82 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 44 ; CHECK-RV64-NEXT: bltz a2, .LBB61_608 @@ -13870,8 +13813,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 83 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 43 ; CHECK-RV64-NEXT: bltz a2, .LBB61_609 @@ -13885,8 +13828,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 84 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 42 ; CHECK-RV64-NEXT: bltz a2, .LBB61_610 @@ -13900,8 +13843,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 85 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 41 ; CHECK-RV64-NEXT: bltz a2, .LBB61_611 @@ -13915,8 +13858,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 86 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 40 ; CHECK-RV64-NEXT: bltz a2, .LBB61_612 @@ -13930,8 +13873,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 87 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 39 ; CHECK-RV64-NEXT: bltz a2, .LBB61_613 @@ -13945,8 +13888,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 88 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 38 ; CHECK-RV64-NEXT: bltz a2, .LBB61_614 @@ -13960,8 +13903,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 89 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 37 ; CHECK-RV64-NEXT: bltz a2, .LBB61_615 @@ -13975,8 +13918,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 90 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 36 ; CHECK-RV64-NEXT: bltz a2, .LBB61_616 @@ -13990,8 +13933,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 91 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 35 ; CHECK-RV64-NEXT: bltz a2, .LBB61_617 @@ -14005,8 +13948,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 92 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 34 ; CHECK-RV64-NEXT: bltz a2, .LBB61_618 @@ -14020,8 +13963,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 93 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 33 ; CHECK-RV64-NEXT: bltz a2, .LBB61_619 @@ -14035,8 +13978,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 94 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 32 ; CHECK-RV64-NEXT: bltz a2, .LBB61_620 @@ -14050,8 +13993,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 95 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 31 ; CHECK-RV64-NEXT: bltz a2, .LBB61_621 @@ -14065,8 +14008,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 96 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 30 ; CHECK-RV64-NEXT: bltz a2, .LBB61_622 @@ -14080,8 +14023,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 97 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 29 ; CHECK-RV64-NEXT: bltz a2, .LBB61_623 @@ -14095,8 +14038,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 98 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 28 ; CHECK-RV64-NEXT: bltz a2, .LBB61_624 @@ -14110,8 +14053,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 99 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 27 ; CHECK-RV64-NEXT: bltz a2, .LBB61_625 @@ -14125,8 +14068,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 100 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 26 ; CHECK-RV64-NEXT: bltz a2, .LBB61_626 @@ -14140,8 +14083,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 101 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 25 ; CHECK-RV64-NEXT: bltz a2, .LBB61_627 @@ -14155,8 +14098,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 102 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 24 ; CHECK-RV64-NEXT: bltz a2, .LBB61_628 @@ -14170,8 +14113,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 103 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 23 ; CHECK-RV64-NEXT: bltz a2, .LBB61_629 @@ -14185,8 +14128,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 104 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 22 ; CHECK-RV64-NEXT: bltz a2, .LBB61_630 @@ -14200,8 +14143,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 105 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 21 ; CHECK-RV64-NEXT: bltz a2, .LBB61_631 @@ -14215,8 +14158,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 106 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 20 ; CHECK-RV64-NEXT: bltz a2, .LBB61_632 @@ -14230,8 +14173,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 107 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 19 ; CHECK-RV64-NEXT: bltz a2, .LBB61_633 @@ -14245,8 +14188,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 108 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 18 ; CHECK-RV64-NEXT: bltz a2, .LBB61_634 @@ -14260,8 +14203,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 109 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 17 ; CHECK-RV64-NEXT: bltz a2, .LBB61_635 @@ -14275,8 +14218,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 110 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 16 ; CHECK-RV64-NEXT: bltz a2, .LBB61_636 @@ -14290,8 +14233,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 111 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 15 ; CHECK-RV64-NEXT: bltz a2, .LBB61_637 @@ -14305,8 +14248,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 112 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 14 ; CHECK-RV64-NEXT: bltz a2, .LBB61_638 @@ -14320,8 +14263,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 113 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 13 ; CHECK-RV64-NEXT: bltz a2, .LBB61_639 @@ -14335,8 +14278,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 114 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 12 ; CHECK-RV64-NEXT: bltz a2, .LBB61_640 @@ -14350,8 +14293,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 115 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 11 ; CHECK-RV64-NEXT: bltz a2, .LBB61_641 @@ -14365,8 +14308,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 116 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 10 ; CHECK-RV64-NEXT: bltz a2, .LBB61_642 @@ -14380,8 +14323,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 117 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 9 ; CHECK-RV64-NEXT: bltz a2, .LBB61_643 @@ -14395,8 +14338,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 118 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 8 ; CHECK-RV64-NEXT: bltz a2, .LBB61_644 @@ -14410,8 +14353,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 119 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 7 ; CHECK-RV64-NEXT: bltz a2, .LBB61_645 @@ -14425,8 +14368,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 120 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 6 ; CHECK-RV64-NEXT: bltz a2, .LBB61_646 @@ -14440,8 +14383,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 121 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 5 ; CHECK-RV64-NEXT: bltz a2, .LBB61_647 @@ -14455,8 +14398,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 122 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 4 ; CHECK-RV64-NEXT: bltz a2, .LBB61_648 @@ -14470,8 +14413,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 123 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 3 ; CHECK-RV64-NEXT: bltz a2, .LBB61_649 @@ -14485,8 +14428,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 124 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 2 ; CHECK-RV64-NEXT: bgez a2, .LBB61_1026 @@ -14501,8 +14444,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 127 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m2, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v10, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv2r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 1 ; CHECK-RV64-NEXT: bnez a1, .LBB61_651 @@ -14516,8 +14459,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 128 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 2 ; CHECK-RV64-NEXT: bnez a1, .LBB61_652 @@ -14531,8 +14474,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 129 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 4 ; CHECK-RV64-NEXT: bnez a1, .LBB61_653 @@ -14546,8 +14489,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 130 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 8 ; CHECK-RV64-NEXT: bnez a1, .LBB61_654 @@ -14561,8 +14504,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 131 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 16 ; CHECK-RV64-NEXT: bnez a1, .LBB61_655 @@ -14576,8 +14519,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 132 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 32 ; CHECK-RV64-NEXT: bnez a1, .LBB61_656 @@ -14591,8 +14534,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 133 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 64 ; CHECK-RV64-NEXT: bnez a1, .LBB61_657 @@ -14606,8 +14549,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 134 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 128 ; CHECK-RV64-NEXT: bnez a1, .LBB61_658 @@ -14621,8 +14564,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 135 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 256 ; CHECK-RV64-NEXT: bnez a1, .LBB61_659 @@ -14636,8 +14579,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 136 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 512 ; CHECK-RV64-NEXT: bnez a1, .LBB61_660 @@ -14651,8 +14594,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 137 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 1024 ; CHECK-RV64-NEXT: bnez a1, .LBB61_661 @@ -14666,8 +14609,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 138 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 52 ; CHECK-RV64-NEXT: bltz a1, .LBB61_662 @@ -14681,8 +14624,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 139 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 51 ; CHECK-RV64-NEXT: bltz a1, .LBB61_663 @@ -14696,8 +14639,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 140 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 50 ; CHECK-RV64-NEXT: bltz a1, .LBB61_664 @@ -14711,8 +14654,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 141 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 49 ; CHECK-RV64-NEXT: bltz a1, .LBB61_665 @@ -14726,8 +14669,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 142 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 48 ; CHECK-RV64-NEXT: bltz a1, .LBB61_666 @@ -14741,8 +14684,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 143 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 47 ; CHECK-RV64-NEXT: bltz a1, .LBB61_667 @@ -14756,8 +14699,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 144 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 46 ; CHECK-RV64-NEXT: bltz a1, .LBB61_668 @@ -14771,8 +14714,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 145 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 45 ; CHECK-RV64-NEXT: bltz a1, .LBB61_669 @@ -14786,8 +14729,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 146 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 44 ; CHECK-RV64-NEXT: bltz a1, .LBB61_670 @@ -14801,8 +14744,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 147 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 43 ; CHECK-RV64-NEXT: bltz a1, .LBB61_671 @@ -14816,8 +14759,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 148 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 42 ; CHECK-RV64-NEXT: bltz a1, .LBB61_672 @@ -14831,8 +14774,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 149 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 41 ; CHECK-RV64-NEXT: bltz a1, .LBB61_673 @@ -14846,8 +14789,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 150 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 40 ; CHECK-RV64-NEXT: bltz a1, .LBB61_674 @@ -14861,8 +14804,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 151 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 39 ; CHECK-RV64-NEXT: bltz a1, .LBB61_675 @@ -14876,8 +14819,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 152 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 38 ; CHECK-RV64-NEXT: bltz a1, .LBB61_676 @@ -14891,8 +14834,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 153 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 37 ; CHECK-RV64-NEXT: bltz a1, .LBB61_677 @@ -14906,8 +14849,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 154 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 36 ; CHECK-RV64-NEXT: bltz a1, .LBB61_678 @@ -14921,8 +14864,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 155 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 35 ; CHECK-RV64-NEXT: bltz a1, .LBB61_679 @@ -14936,8 +14879,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 156 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 34 ; CHECK-RV64-NEXT: bltz a1, .LBB61_680 @@ -14951,8 +14894,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 157 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 33 ; CHECK-RV64-NEXT: bltz a1, .LBB61_681 @@ -14966,8 +14909,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 158 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 32 ; CHECK-RV64-NEXT: bltz a1, .LBB61_682 @@ -14981,8 +14924,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 159 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 31 ; CHECK-RV64-NEXT: bltz a1, .LBB61_683 @@ -14996,8 +14939,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 160 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 30 ; CHECK-RV64-NEXT: bltz a1, .LBB61_684 @@ -15011,8 +14954,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 161 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 29 ; CHECK-RV64-NEXT: bltz a1, .LBB61_685 @@ -15026,8 +14969,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 162 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 28 ; CHECK-RV64-NEXT: bltz a1, .LBB61_686 @@ -15041,8 +14984,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 163 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 27 ; CHECK-RV64-NEXT: bltz a1, .LBB61_687 @@ -15056,8 +14999,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 164 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 26 ; CHECK-RV64-NEXT: bltz a1, .LBB61_688 @@ -15071,8 +15014,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 165 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 25 ; CHECK-RV64-NEXT: bltz a1, .LBB61_689 @@ -15086,8 +15029,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 166 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 24 ; CHECK-RV64-NEXT: bltz a1, .LBB61_690 @@ -15101,8 +15044,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 167 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 23 ; CHECK-RV64-NEXT: bltz a1, .LBB61_691 @@ -15116,8 +15059,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 168 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 22 ; CHECK-RV64-NEXT: bltz a1, .LBB61_692 @@ -15131,8 +15074,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 169 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 21 ; CHECK-RV64-NEXT: bltz a1, .LBB61_693 @@ -15146,8 +15089,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 170 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 20 ; CHECK-RV64-NEXT: bltz a1, .LBB61_694 @@ -15161,8 +15104,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 171 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 19 ; CHECK-RV64-NEXT: bltz a1, .LBB61_695 @@ -15176,8 +15119,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 172 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 18 ; CHECK-RV64-NEXT: bltz a1, .LBB61_696 @@ -15191,8 +15134,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 173 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 17 ; CHECK-RV64-NEXT: bltz a1, .LBB61_697 @@ -15206,8 +15149,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 174 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 16 ; CHECK-RV64-NEXT: bltz a1, .LBB61_698 @@ -15221,8 +15164,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 175 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 15 ; CHECK-RV64-NEXT: bltz a1, .LBB61_699 @@ -15236,8 +15179,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 176 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 14 ; CHECK-RV64-NEXT: bltz a1, .LBB61_700 @@ -15251,8 +15194,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 177 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 13 ; CHECK-RV64-NEXT: bltz a1, .LBB61_701 @@ -15266,8 +15209,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 178 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 12 ; CHECK-RV64-NEXT: bltz a1, .LBB61_702 @@ -15281,8 +15224,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 179 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 11 ; CHECK-RV64-NEXT: bltz a1, .LBB61_703 @@ -15296,8 +15239,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 180 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 10 ; CHECK-RV64-NEXT: bltz a1, .LBB61_704 @@ -15311,8 +15254,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 181 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 9 ; CHECK-RV64-NEXT: bltz a1, .LBB61_705 @@ -15326,8 +15269,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 182 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 8 ; CHECK-RV64-NEXT: bltz a1, .LBB61_706 @@ -15341,8 +15284,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 183 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 7 ; CHECK-RV64-NEXT: bltz a1, .LBB61_707 @@ -15356,8 +15299,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 184 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 6 ; CHECK-RV64-NEXT: bltz a1, .LBB61_708 @@ -15371,8 +15314,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 185 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 5 ; CHECK-RV64-NEXT: bltz a1, .LBB61_709 @@ -15386,8 +15329,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 186 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 4 ; CHECK-RV64-NEXT: bltz a1, .LBB61_710 @@ -15401,8 +15344,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 187 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 3 ; CHECK-RV64-NEXT: bltz a1, .LBB61_711 @@ -15416,8 +15359,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 188 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a1, a2, 2 ; CHECK-RV64-NEXT: bgez a1, .LBB61_1027 @@ -15432,8 +15375,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 191 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 1 ; CHECK-RV64-NEXT: bnez a2, .LBB61_713 @@ -15447,8 +15390,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 192 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 2 ; CHECK-RV64-NEXT: bnez a2, .LBB61_714 @@ -15462,8 +15405,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 193 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 4 ; CHECK-RV64-NEXT: bnez a2, .LBB61_715 @@ -15477,8 +15420,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 194 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 8 ; CHECK-RV64-NEXT: bnez a2, .LBB61_716 @@ -15492,8 +15435,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 195 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 16 ; CHECK-RV64-NEXT: bnez a2, .LBB61_717 @@ -15507,8 +15450,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 196 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 32 ; CHECK-RV64-NEXT: bnez a2, .LBB61_718 @@ -15522,8 +15465,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 197 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 64 ; CHECK-RV64-NEXT: bnez a2, .LBB61_719 @@ -15537,8 +15480,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 198 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 128 ; CHECK-RV64-NEXT: bnez a2, .LBB61_720 @@ -15552,8 +15495,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 199 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 256 ; CHECK-RV64-NEXT: bnez a2, .LBB61_721 @@ -15567,8 +15510,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 200 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 512 ; CHECK-RV64-NEXT: bnez a2, .LBB61_722 @@ -15582,8 +15525,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 201 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a2, a1, 1024 ; CHECK-RV64-NEXT: bnez a2, .LBB61_723 @@ -15597,8 +15540,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 202 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 52 ; CHECK-RV64-NEXT: bltz a2, .LBB61_724 @@ -15612,8 +15555,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 203 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 51 ; CHECK-RV64-NEXT: bltz a2, .LBB61_725 @@ -15627,8 +15570,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 204 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 50 ; CHECK-RV64-NEXT: bltz a2, .LBB61_726 @@ -15642,8 +15585,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 205 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 49 ; CHECK-RV64-NEXT: bltz a2, .LBB61_727 @@ -15657,8 +15600,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 206 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 48 ; CHECK-RV64-NEXT: bltz a2, .LBB61_728 @@ -15672,8 +15615,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 207 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 47 ; CHECK-RV64-NEXT: bltz a2, .LBB61_729 @@ -15687,8 +15630,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 208 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 46 ; CHECK-RV64-NEXT: bltz a2, .LBB61_730 @@ -15702,8 +15645,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 209 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 45 ; CHECK-RV64-NEXT: bltz a2, .LBB61_731 @@ -15717,8 +15660,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 210 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 44 ; CHECK-RV64-NEXT: bltz a2, .LBB61_732 @@ -15732,8 +15675,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 211 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 43 ; CHECK-RV64-NEXT: bltz a2, .LBB61_733 @@ -15747,8 +15690,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 212 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 42 ; CHECK-RV64-NEXT: bltz a2, .LBB61_734 @@ -15762,8 +15705,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 213 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 41 ; CHECK-RV64-NEXT: bltz a2, .LBB61_735 @@ -15777,8 +15720,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 214 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 40 ; CHECK-RV64-NEXT: bltz a2, .LBB61_736 @@ -15792,8 +15735,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 215 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 39 ; CHECK-RV64-NEXT: bltz a2, .LBB61_737 @@ -15807,8 +15750,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 216 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 38 ; CHECK-RV64-NEXT: bltz a2, .LBB61_738 @@ -15822,8 +15765,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 217 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 37 ; CHECK-RV64-NEXT: bltz a2, .LBB61_739 @@ -15837,8 +15780,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 218 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 36 ; CHECK-RV64-NEXT: bltz a2, .LBB61_740 @@ -15852,8 +15795,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 219 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 35 ; CHECK-RV64-NEXT: bltz a2, .LBB61_741 @@ -15867,8 +15810,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 220 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 34 ; CHECK-RV64-NEXT: bltz a2, .LBB61_742 @@ -15882,8 +15825,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 221 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 33 ; CHECK-RV64-NEXT: bltz a2, .LBB61_743 @@ -15897,8 +15840,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 222 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 32 ; CHECK-RV64-NEXT: bltz a2, .LBB61_744 @@ -15912,8 +15855,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 223 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 31 ; CHECK-RV64-NEXT: bltz a2, .LBB61_745 @@ -15927,8 +15870,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 224 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 30 ; CHECK-RV64-NEXT: bltz a2, .LBB61_746 @@ -15942,8 +15885,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 225 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 29 ; CHECK-RV64-NEXT: bltz a2, .LBB61_747 @@ -15957,8 +15900,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 226 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 28 ; CHECK-RV64-NEXT: bltz a2, .LBB61_748 @@ -15972,8 +15915,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 227 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 27 ; CHECK-RV64-NEXT: bltz a2, .LBB61_749 @@ -15987,8 +15930,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 228 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 26 ; CHECK-RV64-NEXT: bltz a2, .LBB61_750 @@ -16002,8 +15945,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 229 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 25 ; CHECK-RV64-NEXT: bltz a2, .LBB61_751 @@ -16017,8 +15960,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 230 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 24 ; CHECK-RV64-NEXT: bltz a2, .LBB61_752 @@ -16032,8 +15975,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 231 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 23 ; CHECK-RV64-NEXT: bltz a2, .LBB61_753 @@ -16047,8 +15990,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 232 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 22 ; CHECK-RV64-NEXT: bltz a2, .LBB61_754 @@ -16062,8 +16005,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 233 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 21 ; CHECK-RV64-NEXT: bltz a2, .LBB61_755 @@ -16077,8 +16020,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 234 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 20 ; CHECK-RV64-NEXT: bltz a2, .LBB61_756 @@ -16092,8 +16035,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 235 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 19 ; CHECK-RV64-NEXT: bltz a2, .LBB61_757 @@ -16107,8 +16050,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 236 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 18 ; CHECK-RV64-NEXT: bltz a2, .LBB61_758 @@ -16122,8 +16065,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 237 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 17 ; CHECK-RV64-NEXT: bltz a2, .LBB61_759 @@ -16137,8 +16080,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 238 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 16 ; CHECK-RV64-NEXT: bltz a2, .LBB61_760 @@ -16152,8 +16095,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 239 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 15 ; CHECK-RV64-NEXT: bltz a2, .LBB61_761 @@ -16167,8 +16110,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 240 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 14 ; CHECK-RV64-NEXT: bltz a2, .LBB61_762 @@ -16182,8 +16125,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 241 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 13 ; CHECK-RV64-NEXT: bltz a2, .LBB61_763 @@ -16197,8 +16140,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 242 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 12 ; CHECK-RV64-NEXT: bltz a2, .LBB61_764 @@ -16212,8 +16155,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 243 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 11 ; CHECK-RV64-NEXT: bltz a2, .LBB61_765 @@ -16227,8 +16170,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 244 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 10 ; CHECK-RV64-NEXT: bltz a2, .LBB61_766 @@ -16242,8 +16185,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 245 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 9 ; CHECK-RV64-NEXT: bltz a2, .LBB61_767 @@ -16257,8 +16200,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 246 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 8 ; CHECK-RV64-NEXT: bltz a2, .LBB61_768 @@ -16272,8 +16215,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 247 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 7 ; CHECK-RV64-NEXT: bltz a2, .LBB61_769 @@ -16287,8 +16230,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 248 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 6 ; CHECK-RV64-NEXT: bltz a2, .LBB61_770 @@ -16302,8 +16245,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 249 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 5 ; CHECK-RV64-NEXT: bltz a2, .LBB61_771 @@ -16317,8 +16260,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 250 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 4 ; CHECK-RV64-NEXT: bltz a2, .LBB61_772 @@ -16332,8 +16275,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 251 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 3 ; CHECK-RV64-NEXT: bltz a2, .LBB61_773 @@ -16347,8 +16290,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 252 ; CHECK-RV64-NEXT: vsetvli zero, a2, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: slli a2, a1, 2 ; CHECK-RV64-NEXT: bgez a2, .LBB61_1028 @@ -16363,8 +16306,8 @@ define <512 x i8> @test_expandload_v512i8_vlen512(ptr %base, <512 x i1> %mask, < ; CHECK-RV64-NEXT: li a3, 255 ; CHECK-RV64-NEXT: vsetvli zero, a1, e8, m4, tu, ma ; CHECK-RV64-NEXT: vslideup.vx v8, v12, a3 -; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv4r.v v16, v8 +; CHECK-RV64-NEXT: addi a0, a0, 1 ; CHECK-RV64-NEXT: vmv8r.v v8, v16 ; CHECK-RV64-NEXT: andi a1, a2, 1 ; CHECK-RV64-NEXT: bnez a1, .LBB61_775 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll index d60ce408278da..2961b880bdceb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-fp.ll @@ -1330,14 +1330,14 @@ define double @extractelt_nxv16f64_neg1( %v) { ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: addi a0, sp, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: li a1, -1 ; RV64-NEXT: vs8r.v v8, (a0) -; RV64-NEXT: slli a3, a2, 3 +; RV64-NEXT: slli a1, a2, 3 +; RV64-NEXT: add a1, a0, a1 +; RV64-NEXT: vs8r.v v16, (a1) +; RV64-NEXT: li a1, -1 ; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: slli a2, a2, 1 -; RV64-NEXT: add a3, a0, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: vs8r.v v16, (a3) ; RV64-NEXT: bltu a2, a1, .LBB70_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a2, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll index 796f8dde58f47..4664a48a2d668 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-i1.ll @@ -7,9 +7,9 @@ define i1 @extractelt_nxv1i1(ptr %x, i64 %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -24,9 +24,9 @@ define i1 @extractelt_nxv2i1(ptr %x, i64 %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -41,9 +41,9 @@ define i1 @extractelt_nxv4i1(ptr %x, i64 %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -58,9 +58,9 @@ define i1 @extractelt_nxv8i1(ptr %x, i64 %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vl1r.v v8, (a0) ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -140,14 +140,14 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: sub sp, sp, a3 ; RV32-NEXT: andi sp, sp, -64 ; RV32-NEXT: addi a3, sp, 64 -; RV32-NEXT: vl8r.v v8, (a0) ; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a0, a0, a2 -; RV32-NEXT: vl8r.v v24, (a0) -; RV32-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV32-NEXT: vmseq.vi v0, v8, 0 +; RV32-NEXT: vl8r.v v8, (a0) +; RV32-NEXT: vsetvli a4, zero, e8, m8, ta, ma ; RV32-NEXT: vmv.v.i v16, 0 ; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a0, a0, a2 +; RV32-NEXT: vmseq.vi v0, v8, 0 +; RV32-NEXT: vl8r.v v24, (a0) ; RV32-NEXT: add a2, a3, a2 ; RV32-NEXT: vmseq.vi v8, v24, 0 ; RV32-NEXT: vmerge.vim v24, v16, 1, v0 @@ -180,14 +180,14 @@ define i1 @extractelt_nxv128i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: sub sp, sp, a3 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: addi a3, sp, 64 -; RV64-NEXT: vl8r.v v8, (a0) ; RV64-NEXT: slli a2, a2, 3 -; RV64-NEXT: add a0, a0, a2 -; RV64-NEXT: vl8r.v v24, (a0) -; RV64-NEXT: vsetvli a0, zero, e8, m8, ta, ma -; RV64-NEXT: vmseq.vi v0, v8, 0 +; RV64-NEXT: vl8r.v v8, (a0) +; RV64-NEXT: vsetvli a4, zero, e8, m8, ta, ma ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: add a1, a3, a1 +; RV64-NEXT: add a0, a0, a2 +; RV64-NEXT: vmseq.vi v0, v8, 0 +; RV64-NEXT: vl8r.v v24, (a0) ; RV64-NEXT: add a2, a3, a2 ; RV64-NEXT: vmseq.vi v8, v24, 0 ; RV64-NEXT: vmerge.vim v24, v16, 1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll index a9e129ef11a2c..1546276381021 100644 --- a/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/extractelt-int-rv64.ll @@ -863,14 +863,14 @@ define i64 @extractelt_nxv16i64_neg1( %v) { ; CHECK-NEXT: andi sp, sp, -64 ; CHECK-NEXT: addi a0, sp, 64 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: vs8r.v v8, (a0) -; CHECK-NEXT: slli a3, a2, 3 +; CHECK-NEXT: slli a1, a2, 3 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: vs8r.v v16, (a1) +; CHECK-NEXT: li a1, -1 ; CHECK-NEXT: srli a1, a1, 32 ; CHECK-NEXT: slli a2, a2, 1 -; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 -; CHECK-NEXT: vs8r.v v16, (a3) ; CHECK-NEXT: bltu a2, a1, .LBB74_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll index 1626b362fed15..1263094f3ace0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-constrained-sdnode.ll @@ -10,11 +10,11 @@ define @ceil_nxv1f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -33,11 +33,11 @@ define @ceil_nxv2f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -56,11 +56,11 @@ define @ceil_nxv4f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -79,11 +79,11 @@ define @ceil_nxv8f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -102,11 +102,11 @@ define @ceil_nxv16f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -125,11 +125,11 @@ define @ceil_nxv32f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -150,9 +150,9 @@ define @ceil_nxv1f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -173,9 +173,9 @@ define @ceil_nxv2f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -196,9 +196,9 @@ define @ceil_nxv4f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -219,9 +219,9 @@ define @ceil_nxv8f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -242,9 +242,9 @@ define @ceil_nxv16f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -263,11 +263,11 @@ define @ceil_nxv1f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -286,11 +286,11 @@ define @ceil_nxv2f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -309,11 +309,11 @@ define @ceil_nxv4f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -332,11 +332,11 @@ define @ceil_nxv8f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll index 4aca2d694dfbb..e8a787f7b615e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fceil-sdnode.ll @@ -18,11 +18,11 @@ define @ceil_nxv1bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -41,11 +41,11 @@ define @ceil_nxv2bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -64,11 +64,11 @@ define @ceil_nxv4bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -87,11 +87,11 @@ define @ceil_nxv8bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -110,11 +110,11 @@ define @ceil_nxv16bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -133,11 +133,11 @@ define @ceil_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -167,12 +167,12 @@ define @ceil_nxv32bf16( %x) { define @ceil_nxv1f16( %x) { ; ZVFH-LABEL: ceil_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -185,11 +185,11 @@ define @ceil_nxv1f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -206,12 +206,12 @@ declare @llvm.ceil.nxv1f16() define @ceil_nxv2f16( %x) { ; ZVFH-LABEL: ceil_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -224,11 +224,11 @@ define @ceil_nxv2f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -245,12 +245,12 @@ declare @llvm.ceil.nxv2f16() define @ceil_nxv4f16( %x) { ; ZVFH-LABEL: ceil_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -263,11 +263,11 @@ define @ceil_nxv4f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -284,12 +284,12 @@ declare @llvm.ceil.nxv4f16() define @ceil_nxv8f16( %x) { ; ZVFH-LABEL: ceil_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -302,11 +302,11 @@ define @ceil_nxv8f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -323,12 +323,12 @@ declare @llvm.ceil.nxv8f16() define @ceil_nxv16f16( %x) { ; ZVFH-LABEL: ceil_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -341,11 +341,11 @@ define @ceil_nxv16f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -362,12 +362,12 @@ declare @llvm.ceil.nxv16f16() define @ceil_nxv32f16( %x) { ; ZVFH-LABEL: ceil_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -380,11 +380,11 @@ define @ceil_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -419,8 +419,8 @@ define @ceil_nxv1f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -439,8 +439,8 @@ define @ceil_nxv2f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -459,8 +459,8 @@ define @ceil_nxv4f32( %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -479,8 +479,8 @@ define @ceil_nxv8f32( %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -499,8 +499,8 @@ define @ceil_nxv16f32( %x) { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -515,12 +515,12 @@ declare @llvm.ceil.nxv16f32() define @ceil_nxv1f64( %x) { ; CHECK-LABEL: ceil_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -535,12 +535,12 @@ declare @llvm.ceil.nxv1f64() define @ceil_nxv2f64( %x) { ; CHECK-LABEL: ceil_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -555,12 +555,12 @@ declare @llvm.ceil.nxv2f64() define @ceil_nxv4f64( %x) { ; CHECK-LABEL: ceil_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -575,12 +575,12 @@ declare @llvm.ceil.nxv4f64() define @ceil_nxv8f64( %x) { ; CHECK-LABEL: ceil_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll index d93f15ec44053..c3d7a9b3e877c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-constrained-sdnode.ll @@ -10,11 +10,11 @@ define @floor_nxv1f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -33,11 +33,11 @@ define @floor_nxv2f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -56,11 +56,11 @@ define @floor_nxv4f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -79,11 +79,11 @@ define @floor_nxv8f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -102,11 +102,11 @@ define @floor_nxv16f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -125,11 +125,11 @@ define @floor_nxv32f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -150,9 +150,9 @@ define @floor_nxv1f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -173,9 +173,9 @@ define @floor_nxv2f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -196,9 +196,9 @@ define @floor_nxv4f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -219,9 +219,9 @@ define @floor_nxv8f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -242,9 +242,9 @@ define @floor_nxv16f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -263,11 +263,11 @@ define @floor_nxv1f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -286,11 +286,11 @@ define @floor_nxv2f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -309,11 +309,11 @@ define @floor_nxv4f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -332,11 +332,11 @@ define @floor_nxv8f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll index 010d7786c8891..88cd31f77bbbc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/ffloor-sdnode.ll @@ -18,11 +18,11 @@ define @floor_nxv1bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -42,11 +42,11 @@ define @floor_nxv2bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -66,11 +66,11 @@ define @floor_nxv4bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -90,11 +90,11 @@ define @floor_nxv8bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -114,11 +114,11 @@ define @floor_nxv16bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -138,11 +138,11 @@ define @floor_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -173,12 +173,12 @@ declare @llvm.floor.nxv32bf16() define @floor_nxv1f16( %x) { ; ZVFH-LABEL: floor_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -191,11 +191,11 @@ define @floor_nxv1f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -212,12 +212,12 @@ declare @llvm.floor.nxv1f16() define @floor_nxv2f16( %x) { ; ZVFH-LABEL: floor_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -230,11 +230,11 @@ define @floor_nxv2f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -251,12 +251,12 @@ declare @llvm.floor.nxv2f16() define @floor_nxv4f16( %x) { ; ZVFH-LABEL: floor_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -269,11 +269,11 @@ define @floor_nxv4f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -290,12 +290,12 @@ declare @llvm.floor.nxv4f16() define @floor_nxv8f16( %x) { ; ZVFH-LABEL: floor_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -308,11 +308,11 @@ define @floor_nxv8f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -329,12 +329,12 @@ declare @llvm.floor.nxv8f16() define @floor_nxv16f16( %x) { ; ZVFH-LABEL: floor_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -347,11 +347,11 @@ define @floor_nxv16f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -368,12 +368,12 @@ declare @llvm.floor.nxv16f16() define @floor_nxv32f16( %x) { ; ZVFH-LABEL: floor_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -386,11 +386,11 @@ define @floor_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -425,8 +425,8 @@ define @floor_nxv1f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -445,8 +445,8 @@ define @floor_nxv2f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -465,8 +465,8 @@ define @floor_nxv4f32( %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -485,8 +485,8 @@ define @floor_nxv8f32( %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -505,8 +505,8 @@ define @floor_nxv16f32( %x) { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -521,12 +521,12 @@ declare @llvm.floor.nxv16f32() define @floor_nxv1f64( %x) { ; CHECK-LABEL: floor_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -541,12 +541,12 @@ declare @llvm.floor.nxv1f64() define @floor_nxv2f64( %x) { ; CHECK-LABEL: floor_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -561,12 +561,12 @@ declare @llvm.floor.nxv2f64() define @floor_nxv4f64( %x) { ; CHECK-LABEL: floor_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -581,12 +581,12 @@ declare @llvm.floor.nxv4f64() define @floor_nxv8f64( %x) { ; CHECK-LABEL: floor_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll index 1752dfd50d0c5..2b973c9b80828 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vector-i8-index-cornercase.ll @@ -22,27 +22,27 @@ define <512 x i8> @single_source(<512 x i8> %a) { ; CHECK-NEXT: addi a1, sp, 512 ; CHECK-NEXT: vmv.x.s a2, v16 ; CHECK-NEXT: vslidedown.vi v24, v16, 5 -; CHECK-NEXT: li a3, 432 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vse8.v v8, (a1) +; CHECK-NEXT: li a0, 432 ; CHECK-NEXT: vmv.v.x v8, a2 -; CHECK-NEXT: lbu a0, 770(sp) +; CHECK-NEXT: lbu a1, 770(sp) +; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: li a1, 431 -; CHECK-NEXT: vslide1down.vx v8, v8, a0 -; CHECK-NEXT: lbu a0, 1012(sp) -; CHECK-NEXT: vsetvli zero, a3, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v24, a1 ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v16, 4 -; CHECK-NEXT: li a1, 466 -; CHECK-NEXT: vmv.s.x v16, a0 -; CHECK-NEXT: li a0, 465 +; CHECK-NEXT: vslidedown.vi v16, v16, 4 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma +; CHECK-NEXT: vslideup.vx v8, v24, a1 +; CHECK-NEXT: li a0, 466 +; CHECK-NEXT: lbu a1, 1012(sp) +; CHECK-NEXT: vmv.s.x v24, a1 +; CHECK-NEXT: li a1, 465 ; CHECK-NEXT: li a2, 501 -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v24, a0 +; CHECK-NEXT: vsetvli zero, a0, e8, m8, tu, ma +; CHECK-NEXT: vslideup.vx v8, v16, a1 ; CHECK-NEXT: li a0, 500 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v16, a0 +; CHECK-NEXT: vslideup.vx v8, v24, a0 ; CHECK-NEXT: addi sp, s0, -1536 ; CHECK-NEXT: .cfi_def_cfa sp, 1536 ; CHECK-NEXT: ld ra, 1528(sp) # 8-byte Folded Reload @@ -103,12 +103,7 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: addi s0, sp, 1536 ; CHECK-NEXT: .cfi_def_cfa s0, 0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: sub sp, sp, a0 ; CHECK-NEXT: andi sp, sp, -512 -; CHECK-NEXT: addi a0, sp, 1520 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a0, 512 @@ -127,32 +122,30 @@ define <512 x i8> @two_source(<512 x i8> %a, <512 x i8> %b) { ; CHECK-NEXT: li a3, 465 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vse8.v v24, (a1) -; CHECK-NEXT: lbu a1, 985(sp) +; CHECK-NEXT: li a1, 478 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma ; CHECK-NEXT: vslideup.vx v8, v0, a3 -; CHECK-NEXT: li a2, 478 +; CHECK-NEXT: lbu a2, 985(sp) ; CHECK-NEXT: lbu a3, 1012(sp) -; CHECK-NEXT: vmv.s.x v24, a1 -; CHECK-NEXT: li a1, 477 -; CHECK-NEXT: vsetvli zero, a2, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v24, a1 +; CHECK-NEXT: vmv.s.x v24, a2 +; CHECK-NEXT: li a2, 477 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vslideup.vx v8, v24, a2 ; CHECK-NEXT: li a1, 501 +; CHECK-NEXT: vmv.s.x v24, a3 +; CHECK-NEXT: li a2, 500 +; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma +; CHECK-NEXT: vslideup.vx v8, v24, a2 +; CHECK-NEXT: lui a1, %hi(.LCPI2_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI2_0) ; CHECK-NEXT: lui a2, %hi(.LCPI2_1) ; CHECK-NEXT: addi a2, a2, %lo(.LCPI2_1) +; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma +; CHECK-NEXT: vle8.v v24, (a1) ; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; CHECK-NEXT: vle64.v v0, (a2) -; CHECK-NEXT: li a2, 500 -; CHECK-NEXT: vmv.s.x v24, a3 -; CHECK-NEXT: lui a3, %hi(.LCPI2_0) -; CHECK-NEXT: addi a3, a3, %lo(.LCPI2_0) -; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v16, (a3) -; CHECK-NEXT: vsetvli zero, a1, e8, m8, tu, ma -; CHECK-NEXT: vslideup.vx v8, v24, a2 -; CHECK-NEXT: addi a1, sp, 1520 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, mu -; CHECK-NEXT: vrgather.vv v8, v24, v16, v0.t +; CHECK-NEXT: vrgather.vv v8, v16, v24, v0.t ; CHECK-NEXT: addi sp, s0, -1536 ; CHECK-NEXT: .cfi_def_cfa sp, 1536 ; CHECK-NEXT: ld ra, 1528(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll index 84da351de76ba..5f0088a47af24 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-abs-vp.ll @@ -417,9 +417,9 @@ declare <32 x i64> @llvm.vp.abs.v32i64(<32 x i64>, i1 immarg, <32 x i1>, i32) define <32 x i64> @vp_abs_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_abs_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB34_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll index 425422417ec78..753a90c22a366 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitcast-large-vector.ll @@ -9,10 +9,10 @@ define <512 x i8> @bitcast_1024B(<256 x i16> %a, <512 x i8> %b) { ; VLEN256-NEXT: addi a1, a0, 256 ; VLEN256-NEXT: li a2, 256 ; VLEN256-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; VLEN256-NEXT: vle8.v v24, (a0) -; VLEN256-NEXT: vle8.v v0, (a1) -; VLEN256-NEXT: vadd.vv v8, v24, v8 -; VLEN256-NEXT: vadd.vv v16, v0, v16 +; VLEN256-NEXT: vle8.v v24, (a1) +; VLEN256-NEXT: vle8.v v0, (a0) +; VLEN256-NEXT: vadd.vv v8, v0, v8 +; VLEN256-NEXT: vadd.vv v16, v24, v16 ; VLEN256-NEXT: ret ; ; VLEN512-LABEL: bitcast_1024B: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll index 5ea4924468595..1ba173455a8f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll @@ -978,60 +978,60 @@ define <2 x i64> @vp_bitreverse_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsll.vx v10, v8, a2 +; RV32-NEXT: vsll.vx v11, v8, a2 ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vsrl.vx v11, v8, a2 -; RV32-NEXT: vsrl.vx v12, v8, a4 +; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: vsrl.vx v13, v8, a4 +; RV32-NEXT: vand.vx v9, v9, a5 +; RV32-NEXT: vand.vx v13, v13, a1 +; RV32-NEXT: vor.vv v12, v13, v12 ; RV32-NEXT: vand.vx v13, v8, a1 -; RV32-NEXT: vand.vx v12, v12, a1 -; RV32-NEXT: vor.vv v11, v12, v11 +; RV32-NEXT: vsll.vx v13, v13, a4 +; RV32-NEXT: vor.vv v11, v11, v13 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a6), zero +; RV32-NEXT: vlse64.v v13, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v13, v13, a4 -; RV32-NEXT: vor.vv v10, v10, v13 -; RV32-NEXT: vsrl.vi v13, v8, 8 -; RV32-NEXT: vand.vx v9, v9, a5 -; RV32-NEXT: vand.vv v13, v13, v12 -; RV32-NEXT: vor.vv v9, v13, v9 +; RV32-NEXT: vand.vv v10, v10, v13 +; RV32-NEXT: vor.vv v9, v10, v9 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: lui a3, 349525 -; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: vand.vv v10, v8, v13 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: addi a1, a1, -241 ; RV32-NEXT: addi a2, a2, 819 ; RV32-NEXT: addi a3, a3, 1365 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vmv.v.x v10, a1 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vor.vv v9, v9, v12 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v11, a2 +; RV32-NEXT: vmv.v.x v12, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vor.vv v8, v10, v8 +; RV32-NEXT: vor.vv v8, v11, v8 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v10, a3 +; RV32-NEXT: vmv.v.x v11, a3 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: vsrl.vi v9, v8, 4 -; RV32-NEXT: vand.vv v8, v8, v12 -; RV32-NEXT: vand.vv v9, v9, v12 +; RV32-NEXT: vand.vv v8, v8, v10 +; RV32-NEXT: vand.vv v9, v9, v10 ; RV32-NEXT: vsll.vi v8, v8, 4 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v11 -; RV32-NEXT: vand.vv v9, v9, v11 +; RV32-NEXT: vand.vv v8, v8, v12 +; RV32-NEXT: vand.vv v9, v9, v12 ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: vsrl.vi v9, v8, 1 -; RV32-NEXT: vand.vv v8, v8, v10 -; RV32-NEXT: vand.vv v9, v9, v10 +; RV32-NEXT: vand.vv v8, v8, v11 +; RV32-NEXT: vand.vv v9, v9, v11 ; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vor.vv v8, v9, v8 ; RV32-NEXT: addi sp, sp, 16 @@ -1250,25 +1250,25 @@ define <4 x i64> @vp_bitreverse_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: lui a5, 4080 ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsrl.vi v14, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: vsrl.vi v14, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsll.vx v12, v8, a2 ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vsrl.vx v10, v8, a2 -; RV32-NEXT: vsrl.vx v16, v8, a4 -; RV32-NEXT: vand.vx v18, v8, a1 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: vor.vv v10, v16, v10 +; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: vsrl.vx v18, v8, a4 +; RV32-NEXT: vand.vx v20, v10, a5 +; RV32-NEXT: vand.vx v10, v18, a1 +; RV32-NEXT: vor.vv v10, v10, v16 +; RV32-NEXT: vand.vx v16, v8, a1 +; RV32-NEXT: vsll.vx v16, v16, a4 +; RV32-NEXT: vor.vv v12, v12, v16 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vlse64.v v16, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v18, v18, a4 -; RV32-NEXT: vor.vv v12, v12, v18 -; RV32-NEXT: vsrl.vi v18, v8, 8 -; RV32-NEXT: vand.vx v14, v14, a5 -; RV32-NEXT: vand.vv v18, v18, v16 -; RV32-NEXT: vor.vv v14, v18, v14 +; RV32-NEXT: vand.vv v14, v14, v16 +; RV32-NEXT: vor.vv v14, v14, v20 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: lui a3, 349525 @@ -1523,25 +1523,25 @@ define <8 x i64> @vp_bitreverse_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) ; RV32-NEXT: lui a5, 4080 ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsrl.vi v20, v8, 24 +; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsrl.vi v20, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsll.vx v16, v8, a2 ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vsrl.vx v12, v8, a2 -; RV32-NEXT: vsrl.vx v24, v8, a4 -; RV32-NEXT: vand.vx v28, v8, a1 -; RV32-NEXT: vand.vx v24, v24, a1 -; RV32-NEXT: vor.vv v12, v24, v12 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: vsrl.vx v28, v8, a4 +; RV32-NEXT: vand.vx v4, v12, a5 +; RV32-NEXT: vand.vx v12, v28, a1 +; RV32-NEXT: vor.vv v12, v12, v24 +; RV32-NEXT: vand.vx v24, v8, a1 +; RV32-NEXT: vsll.vx v24, v24, a4 +; RV32-NEXT: vor.vv v16, v16, v24 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-NEXT: vlse64.v v24, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v28, v28, a4 -; RV32-NEXT: vor.vv v16, v16, v28 -; RV32-NEXT: vsrl.vi v28, v8, 8 -; RV32-NEXT: vand.vx v20, v20, a5 -; RV32-NEXT: vand.vv v28, v28, v24 -; RV32-NEXT: vor.vv v20, v28, v20 +; RV32-NEXT: vand.vv v20, v20, v24 +; RV32-NEXT: vor.vv v20, v20, v4 ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: lui a3, 349525 @@ -1676,35 +1676,36 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: addi a3, a4, 819 ; RV32-NEXT: sw a3, 32(sp) ; RV32-NEXT: sw a3, 36(sp) -; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: addi a4, a5, 1365 -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: addi a5, a6, -256 ; RV32-NEXT: sw a4, 24(sp) ; RV32-NEXT: sw a4, 28(sp) +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: addi a5, a6, -256 ; RV32-NEXT: vand.vx v8, v8, a5, v0.t ; RV32-NEXT: vsll.vx v8, v8, a2, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 4 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 48 +; RV32-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v16, v24, a3, v0.t +; RV32-NEXT: vsll.vi v8, v16, 24, v0.t +; RV32-NEXT: addi a6, sp, 48 +; RV32-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: slli a4, a4, 3 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vx v16, v24, a3, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb @@ -1739,14 +1740,14 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: addi a2, sp, 32 +; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t @@ -1761,7 +1762,7 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vi v24, v24, 2, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t @@ -1869,75 +1870,76 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: lui a2, 61681 ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: lui a4, 349525 ; RV32-NEXT: li a5, 56 ; RV32-NEXT: lui a6, 16 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a5 -; RV32-NEXT: vsrl.vx v24, v8, a5 +; RV32-NEXT: vsll.vx v8, v8, a5 +; RV32-NEXT: vsrl.vx v24, v16, a5 ; RV32-NEXT: li a5, 40 +; RV32-NEXT: addi a6, a6, -256 +; RV32-NEXT: vsrl.vx v0, v16, a5 +; RV32-NEXT: vand.vx v0, v0, a6 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: addi a7, sp, 48 +; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v16, a6 +; RV32-NEXT: lui a6, 4080 +; RV32-NEXT: vsll.vx v0, v0, a5 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vor.vv v8, v8, v0 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 3 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 48 +; RV32-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v16, 24 ; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: addi a1, a2, -241 +; RV32-NEXT: addi a2, a3, 819 +; RV32-NEXT: addi a3, a4, 1365 +; RV32-NEXT: vand.vx v0, v0, a6 ; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: lui a1, 4080 -; RV32-NEXT: addi a2, a3, 819 ; RV32-NEXT: sw a2, 32(sp) ; RV32-NEXT: sw a2, 36(sp) -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: addi a3, a4, 1365 -; RV32-NEXT: addi a4, a6, -256 -; RV32-NEXT: vsrl.vx v0, v8, a5 ; RV32-NEXT: sw a3, 24(sp) ; RV32-NEXT: sw a3, 28(sp) -; RV32-NEXT: vand.vx v0, v0, a4 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a4 -; RV32-NEXT: vsll.vx v0, v0, a5 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v16, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a2), zero +; RV32-NEXT: vlse64.v v8, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v0, v8, v24 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vx v16, v16, a6 +; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v0, v16, v8 ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v0 +; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a2), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1947,7 +1949,7 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 @@ -2072,35 +2074,36 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: addi a3, a4, 819 ; RV32-NEXT: sw a3, 32(sp) ; RV32-NEXT: sw a3, 36(sp) -; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: addi a4, a5, 1365 -; RV32-NEXT: vsll.vx v16, v8, a1, v0.t -; RV32-NEXT: addi a5, a6, -256 ; RV32-NEXT: sw a4, 24(sp) ; RV32-NEXT: sw a4, 28(sp) +; RV32-NEXT: addi a4, sp, 16 +; RV32-NEXT: vsll.vx v16, v8, a1, v0.t +; RV32-NEXT: addi a5, a6, -256 ; RV32-NEXT: vand.vx v8, v8, a5, v0.t ; RV32-NEXT: vsll.vx v8, v8, a2, v0.t ; RV32-NEXT: vor.vv v8, v16, v8, v0.t +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 4 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 48 +; RV32-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v16, v24, a3, v0.t +; RV32-NEXT: vsll.vi v8, v16, 24, v0.t +; RV32-NEXT: addi a6, sp, 48 +; RV32-NEXT: vs8r.v v8, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 4 +; RV32-NEXT: slli a4, a4, 3 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, 4080 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vx v16, v24, a3, v0.t -; RV32-NEXT: vsll.vi v16, v16, 24, v0.t -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vand.vv v16, v24, v8, v0.t ; RV32-NEXT: vsll.vi v16, v16, 8, v0.t +; RV32-NEXT: addi a4, sp, 48 ; RV32-NEXT: vl8r.v v8, (a4) # Unknown-size Folded Reload ; RV32-NEXT: vor.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb @@ -2135,14 +2138,14 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: addi a2, sp, 32 +; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 48 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t @@ -2157,7 +2160,7 @@ define <16 x i64> @vp_bitreverse_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroex ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vi v24, v24, 2, v0.t ; RV32-NEXT: vor.vv v16, v16, v24, v0.t @@ -2265,75 +2268,76 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev ; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: lui a2, 61681 ; RV32-NEXT: lui a3, 209715 ; RV32-NEXT: lui a4, 349525 ; RV32-NEXT: li a5, 56 ; RV32-NEXT: lui a6, 16 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a5 -; RV32-NEXT: vsrl.vx v24, v8, a5 +; RV32-NEXT: vsll.vx v8, v8, a5 +; RV32-NEXT: vsrl.vx v24, v16, a5 ; RV32-NEXT: li a5, 40 +; RV32-NEXT: addi a6, a6, -256 +; RV32-NEXT: vsrl.vx v0, v16, a5 +; RV32-NEXT: vand.vx v0, v0, a6 +; RV32-NEXT: vor.vv v24, v0, v24 +; RV32-NEXT: addi a7, sp, 48 +; RV32-NEXT: vs8r.v v24, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v16, a6 +; RV32-NEXT: lui a6, 4080 +; RV32-NEXT: vsll.vx v0, v0, a5 +; RV32-NEXT: addi a5, sp, 16 +; RV32-NEXT: vor.vv v8, v8, v0 +; RV32-NEXT: csrr a7, vlenb +; RV32-NEXT: slli a7, a7, 3 +; RV32-NEXT: add a7, sp, a7 +; RV32-NEXT: addi a7, a7, 48 +; RV32-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v16, 24 ; RV32-NEXT: sw a1, 16(sp) ; RV32-NEXT: sw zero, 20(sp) ; RV32-NEXT: addi a1, a2, -241 +; RV32-NEXT: addi a2, a3, 819 +; RV32-NEXT: addi a3, a4, 1365 +; RV32-NEXT: vand.vx v0, v0, a6 ; RV32-NEXT: sw a1, 40(sp) ; RV32-NEXT: sw a1, 44(sp) -; RV32-NEXT: lui a1, 4080 -; RV32-NEXT: addi a2, a3, 819 ; RV32-NEXT: sw a2, 32(sp) ; RV32-NEXT: sw a2, 36(sp) -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: addi a3, a4, 1365 -; RV32-NEXT: addi a4, a6, -256 -; RV32-NEXT: vsrl.vx v0, v8, a5 ; RV32-NEXT: sw a3, 24(sp) ; RV32-NEXT: sw a3, 28(sp) -; RV32-NEXT: vand.vx v0, v0, a4 -; RV32-NEXT: vor.vv v24, v0, v24 -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a4 -; RV32-NEXT: vsll.vx v0, v0, a5 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v16, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a2), zero +; RV32-NEXT: vlse64.v v8, (a5), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: vand.vx v8, v8, a1 -; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v0, v8, v24 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vx v16, v16, a6 +; RV32-NEXT: vsll.vi v16, v16, 24 +; RV32-NEXT: vsll.vi v8, v8, 8 +; RV32-NEXT: vor.vv v0, v16, v8 ; RV32-NEXT: addi a1, sp, 48 ; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vor.vv v8, v24, v8 ; RV32-NEXT: addi a1, sp, 40 ; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v24, v16, v0 +; RV32-NEXT: addi a3, sp, 24 +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: slli a4, a4, 3 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 48 +; RV32-NEXT: vl8r.v v16, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero -; RV32-NEXT: addi a1, sp, 24 +; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vor.vv v8, v24, v8 -; RV32-NEXT: vsrl.vi v24, v8, 4 -; RV32-NEXT: vand.vv v8, v8, v16 -; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vor.vv v8, v16, v8 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a2), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2343,7 +2347,7 @@ define <16 x i64> @vp_bitreverse_v16i64_unmasked(<16 x i64> %va, i32 zeroext %ev ; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vor.vv v8, v16, v8 @@ -2455,9 +2459,9 @@ define <128 x i16> @vp_bitreverse_v128i16(<128 x i16> %va, <128 x i1> %m, i32 ze ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 8 +; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB34_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll index d765e4c0b8f6a..37caf61aac19c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bswap-vp.ll @@ -370,31 +370,31 @@ define <2 x i64> @vp_bswap_v2i64_unmasked(<2 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; RV32-NEXT: vsrl.vi v9, v8, 24 +; RV32-NEXT: vsrl.vi v10, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsll.vx v10, v8, a2 +; RV32-NEXT: vsll.vx v11, v8, a2 ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vsrl.vx v11, v8, a2 -; RV32-NEXT: vsrl.vx v12, v8, a4 +; RV32-NEXT: vsrl.vx v12, v8, a2 +; RV32-NEXT: vsrl.vx v13, v8, a4 +; RV32-NEXT: vand.vx v9, v9, a5 +; RV32-NEXT: vand.vx v13, v13, a1 +; RV32-NEXT: vor.vv v12, v13, v12 ; RV32-NEXT: vand.vx v13, v8, a1 -; RV32-NEXT: vand.vx v12, v12, a1 -; RV32-NEXT: vor.vv v11, v12, v11 +; RV32-NEXT: vsll.vx v13, v13, a4 +; RV32-NEXT: vor.vv v11, v11, v13 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vlse64.v v12, (a6), zero +; RV32-NEXT: vlse64.v v13, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; RV32-NEXT: vsll.vx v13, v13, a4 -; RV32-NEXT: vor.vv v10, v10, v13 -; RV32-NEXT: vsrl.vi v13, v8, 8 -; RV32-NEXT: vand.vx v9, v9, a5 -; RV32-NEXT: vand.vv v13, v13, v12 -; RV32-NEXT: vor.vv v9, v13, v9 -; RV32-NEXT: vand.vv v12, v8, v12 +; RV32-NEXT: vand.vv v10, v10, v13 +; RV32-NEXT: vor.vv v9, v10, v9 +; RV32-NEXT: vand.vv v10, v8, v13 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v12, v12, 8 -; RV32-NEXT: vor.vv v8, v8, v12 -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: vor.vv v9, v9, v11 +; RV32-NEXT: vsll.vi v10, v10, 8 +; RV32-NEXT: vor.vv v8, v8, v10 +; RV32-NEXT: vor.vv v8, v11, v8 +; RV32-NEXT: vor.vv v9, v9, v12 ; RV32-NEXT: vor.vv v8, v8, v9 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -530,31 +530,31 @@ define <4 x i64> @vp_bswap_v4i64_unmasked(<4 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; RV32-NEXT: vsrl.vi v10, v8, 24 +; RV32-NEXT: vsrl.vi v12, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsll.vx v12, v8, a2 +; RV32-NEXT: vsll.vx v14, v8, a2 ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vsrl.vx v14, v8, a2 -; RV32-NEXT: vsrl.vx v16, v8, a4 +; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: vsrl.vx v18, v8, a4 +; RV32-NEXT: vand.vx v10, v10, a5 +; RV32-NEXT: vand.vx v18, v18, a1 +; RV32-NEXT: vor.vv v16, v18, v16 ; RV32-NEXT: vand.vx v18, v8, a1 -; RV32-NEXT: vand.vx v16, v16, a1 -; RV32-NEXT: vor.vv v14, v16, v14 +; RV32-NEXT: vsll.vx v18, v18, a4 +; RV32-NEXT: vor.vv v14, v14, v18 ; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vlse64.v v16, (a6), zero +; RV32-NEXT: vlse64.v v18, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m2, ta, ma -; RV32-NEXT: vsll.vx v18, v18, a4 -; RV32-NEXT: vor.vv v12, v12, v18 -; RV32-NEXT: vsrl.vi v18, v8, 8 -; RV32-NEXT: vand.vx v10, v10, a5 -; RV32-NEXT: vand.vv v18, v18, v16 -; RV32-NEXT: vor.vv v10, v18, v10 -; RV32-NEXT: vand.vv v16, v8, v16 +; RV32-NEXT: vand.vv v12, v12, v18 +; RV32-NEXT: vor.vv v10, v12, v10 +; RV32-NEXT: vand.vv v12, v8, v18 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v16, v16, 8 -; RV32-NEXT: vor.vv v8, v8, v16 -; RV32-NEXT: vor.vv v8, v12, v8 -; RV32-NEXT: vor.vv v10, v10, v14 +; RV32-NEXT: vsll.vi v12, v12, 8 +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vor.vv v8, v14, v8 +; RV32-NEXT: vor.vv v10, v10, v16 ; RV32-NEXT: vor.vv v8, v8, v10 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -690,31 +690,31 @@ define <8 x i64> @vp_bswap_v8i64_unmasked(<8 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; RV32-NEXT: vsrl.vi v12, v8, 24 +; RV32-NEXT: vsrl.vi v16, v8, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsll.vx v16, v8, a2 +; RV32-NEXT: vsll.vx v20, v8, a2 ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vsrl.vx v20, v8, a2 -; RV32-NEXT: vsrl.vx v24, v8, a4 +; RV32-NEXT: vsrl.vx v24, v8, a2 +; RV32-NEXT: vsrl.vx v28, v8, a4 +; RV32-NEXT: vand.vx v12, v12, a5 +; RV32-NEXT: vand.vx v28, v28, a1 +; RV32-NEXT: vor.vv v24, v28, v24 ; RV32-NEXT: vand.vx v28, v8, a1 -; RV32-NEXT: vand.vx v24, v24, a1 -; RV32-NEXT: vor.vv v20, v24, v20 +; RV32-NEXT: vsll.vx v28, v28, a4 +; RV32-NEXT: vor.vv v20, v20, v28 ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vlse64.v v24, (a6), zero +; RV32-NEXT: vlse64.v v28, (a6), zero ; RV32-NEXT: vsetvli zero, a0, e64, m4, ta, ma -; RV32-NEXT: vsll.vx v28, v28, a4 -; RV32-NEXT: vor.vv v16, v16, v28 -; RV32-NEXT: vsrl.vi v28, v8, 8 -; RV32-NEXT: vand.vx v12, v12, a5 -; RV32-NEXT: vand.vv v28, v28, v24 -; RV32-NEXT: vor.vv v12, v28, v12 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vand.vv v16, v16, v28 +; RV32-NEXT: vor.vv v12, v16, v12 +; RV32-NEXT: vand.vv v16, v8, v28 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 -; RV32-NEXT: vor.vv v8, v16, v8 -; RV32-NEXT: vor.vv v12, v12, v20 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v20, v8 +; RV32-NEXT: vor.vv v12, v12, v24 ; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -768,61 +768,63 @@ define <15 x i64> @vp_bswap_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %ev ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: li a2, 56 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: lui a5, 4080 +; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vx v8, v8, a2, v0.t ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v24, v8, a1, v0.t +; RV32-NEXT: vand.vx v24, v16, a1, v0.t ; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v8, v16, a5, v0.t +; RV32-NEXT: vsll.vi v8, v8, 24, v0.t +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v8, (a6), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vand.vv v24, v16, v8, v0.t +; RV32-NEXT: vsll.vi v8, v24, 8, v0.t ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v24, 8, v0.t ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v8, v16, a2, v0.t +; RV32-NEXT: vsrl.vx v24, v16, a4, v0.t ; RV32-NEXT: vand.vx v24, v24, a1, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a3, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v16, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 @@ -916,48 +918,48 @@ define <15 x i64> @vp_bswap_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 ; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: addi a6, sp, 8 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v24, v8, a2 -; RV32-NEXT: addi a1, a3, -256 ; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: addi a2, sp, 8 +; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vsrl.vx v0, v8, a4 -; RV32-NEXT: vand.vx v0, v0, a1 +; RV32-NEXT: vand.vx v0, v0, a3 ; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a1 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a4 ; RV32-NEXT: vor.vv v16, v24, v0 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: vand.vx v0, v0, a5 +; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a6), zero +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a5 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 @@ -1031,61 +1033,63 @@ define <16 x i64> @vp_bswap_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %ev ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vmv8r.v v16, v8 ; RV32-NEXT: lui a1, 1044480 ; RV32-NEXT: li a2, 56 ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 -; RV32-NEXT: addi a5, sp, 8 +; RV32-NEXT: lui a5, 4080 +; RV32-NEXT: addi a6, sp, 8 ; RV32-NEXT: sw a1, 8(sp) ; RV32-NEXT: sw zero, 12(sp) -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsll.vx v16, v8, a2, v0.t +; RV32-NEXT: vsll.vx v8, v8, a2, v0.t ; RV32-NEXT: addi a1, a3, -256 -; RV32-NEXT: vand.vx v24, v8, a1, v0.t +; RV32-NEXT: vand.vx v24, v16, a1, v0.t ; RV32-NEXT: vsll.vx v24, v24, a4, v0.t -; RV32-NEXT: vor.vv v16, v16, v24, v0.t +; RV32-NEXT: vor.vv v8, v8, v24, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v8, v16, a5, v0.t +; RV32-NEXT: vsll.vi v8, v8, 24, v0.t +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a5), zero +; RV32-NEXT: vlse64.v v8, (a6), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 16 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: lui a3, 4080 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vx v24, v8, a3, v0.t -; RV32-NEXT: vsll.vi v24, v24, 24, v0.t +; RV32-NEXT: vand.vv v24, v16, v8, v0.t +; RV32-NEXT: vsll.vi v8, v24, 8, v0.t ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vand.vv v24, v8, v16, v0.t -; RV32-NEXT: vsll.vi v16, v24, 8, v0.t ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 ; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t -; RV32-NEXT: vsrl.vx v24, v8, a4, v0.t +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vx v8, v16, a2, v0.t +; RV32-NEXT: vsrl.vx v24, v16, a4, v0.t ; RV32-NEXT: vand.vx v24, v24, a1, v0.t -; RV32-NEXT: vor.vv v16, v24, v16, v0.t +; RV32-NEXT: vor.vv v8, v24, v8, v0.t ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t -; RV32-NEXT: vand.vx v24, v24, a3, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v16, 24, v0.t +; RV32-NEXT: vand.vx v24, v24, a5, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 8, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 @@ -1179,48 +1183,48 @@ define <16 x i64> @vp_bswap_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: lui a3, 16 ; RV32-NEXT: li a4, 40 ; RV32-NEXT: lui a5, 4080 -; RV32-NEXT: addi a6, sp, 8 -; RV32-NEXT: sw a1, 8(sp) -; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsll.vx v24, v8, a2 -; RV32-NEXT: addi a1, a3, -256 ; RV32-NEXT: vsrl.vx v16, v8, a2 +; RV32-NEXT: addi a2, sp, 8 +; RV32-NEXT: addi a3, a3, -256 ; RV32-NEXT: vsrl.vx v0, v8, a4 -; RV32-NEXT: vand.vx v0, v0, a1 +; RV32-NEXT: vand.vx v0, v0, a3 ; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 16 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vand.vx v0, v8, a1 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: slli a6, a6, 3 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs8r.v v16, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vand.vx v0, v8, a3 ; RV32-NEXT: vsll.vx v0, v0, a4 ; RV32-NEXT: vor.vv v16, v24, v0 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v8, 24 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw zero, 12(sp) +; RV32-NEXT: vand.vx v0, v0, a5 +; RV32-NEXT: vsrl.vi v24, v8, 8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a6), zero +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 24 -; RV32-NEXT: vand.vx v16, v16, a5 -; RV32-NEXT: vsrl.vi v24, v8, 8 -; RV32-NEXT: vand.vv v24, v24, v0 -; RV32-NEXT: vor.vv v16, v24, v16 -; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: vand.vv v24, v24, v16 +; RV32-NEXT: vor.vv v24, v24, v0 +; RV32-NEXT: vand.vv v16, v8, v16 ; RV32-NEXT: vand.vx v8, v8, a5 ; RV32-NEXT: vsll.vi v8, v8, 24 -; RV32-NEXT: vsll.vi v24, v24, 8 -; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vsll.vi v16, v16, 8 +; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: addi a0, sp, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v8, v24, v8 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v8, v16, v8 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 16 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vor.vv v16, v16, v24 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vor.vv v16, v24, v16 ; RV32-NEXT: vor.vv v8, v8, v16 ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: slli a0, a0, 4 @@ -1298,9 +1302,9 @@ define <128 x i16> @vp_bswap_v128i16(<128 x i16> %va, <128 x i1> %m, i32 zeroext ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 8 +; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll index dbbb8362144ca..781c61b571994 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-buildvec-of-binop.ll @@ -9,10 +9,10 @@ define <4 x i32> @add_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0) -; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %e0 = add i32 %a, 23 @@ -37,10 +37,10 @@ define <8 x i32> @add_constant_rhs_8xi32(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, a4 -; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a5 ; CHECK-NEXT: vslide1down.vx v8, v8, a6 ; CHECK-NEXT: vslide1down.vx v8, v8, a7 +; CHECK-NEXT: vle32.v v10, (a0) ; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %e0 = add i32 %a, 23 @@ -70,10 +70,10 @@ define <4 x i32> @sub_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI2_0) -; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vsub.vv v8, v8, v9 ; CHECK-NEXT: ret %e0 = sub i32 %a, 23 @@ -94,10 +94,10 @@ define <4 x i32> @mul_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI3_0) -; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vmul.vv v8, v8, v9 ; CHECK-NEXT: ret %e0 = mul i32 %a, 23 @@ -125,15 +125,15 @@ define <4 x i32> @udiv_constant_rhs(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: lui a0, %hi(.LCPI4_1) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_1) ; CHECK-NEXT: vslide1down.vx v9, v9, a1 -; CHECK-NEXT: vle32.v v11, (a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vmulhu.vv v10, v8, v10 -; CHECK-NEXT: vsub.vv v12, v8, v10 -; CHECK-NEXT: vmulhu.vv v9, v12, v9 +; CHECK-NEXT: vsub.vv v11, v8, v10 +; CHECK-NEXT: vmulhu.vv v9, v11, v9 +; CHECK-NEXT: vle32.v v11, (a0) ; CHECK-NEXT: vadd.vv v9, v9, v10 -; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vsrl.vv v9, v9, v11 +; CHECK-NEXT: vmv.v.i v0, 4 ; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 ; CHECK-NEXT: ret %e0 = udiv i32 %a, 23 @@ -155,10 +155,10 @@ define <4 x float> @fadd_constant_rhs(float %a, float %b, float %c, float %d) { ; CHECK-NEXT: vfmv.v.f v8, fa0 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) -; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vfadd.vv v8, v8, v9 ; CHECK-NEXT: ret %e0 = fadd float %a, 23.0 @@ -179,10 +179,10 @@ define <4 x float> @fdiv_constant_rhs(float %a, float %b, float %c, float %d) { ; CHECK-NEXT: vfmv.v.f v8, fa0 ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) -; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vfdiv.vv v8, v8, v9 ; CHECK-NEXT: ret %e0 = fdiv float %a, 23.0 @@ -317,10 +317,10 @@ define <4 x i32> @add_constant_rhs_inverse(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI11_0) -; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %e0 = sub i32 %a, 1 @@ -341,10 +341,10 @@ define <4 x i32> @add_constant_rhs_commute(i32 %a, i32 %b, i32 %c, i32 %d) { ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI12_0) -; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 +; CHECK-NEXT: vle32.v v9, (a0) ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: ret %e0 = add i32 %a, 23 @@ -562,21 +562,20 @@ define <8 x i32> @add_constant_rhs_8xi32_partial(<8 x i32> %vin, i32 %a, i32 %b, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vmv.s.x v12, a1 ; CHECK-NEXT: vslideup.vi v8, v10, 4 -; CHECK-NEXT: vmv.s.x v10, a2 -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) +; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v12, 5 -; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v12, (a0) +; CHECK-NEXT: vslideup.vi v8, v10, 5 +; CHECK-NEXT: vmv.s.x v10, a2 ; CHECK-NEXT: vsetivli zero, 7, e32, m2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 6 ; CHECK-NEXT: vmv.s.x v10, a3 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI19_0) ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v10, 7 -; CHECK-NEXT: vadd.vv v8, v8, v12 +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vadd.vv v8, v8, v10 ; CHECK-NEXT: ret %vadd = add <8 x i32> %vin, %e0 = add i32 %a, 23 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll index 60a9948198c8f..78a6acfac4581 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv-fastcc.ll @@ -87,14 +87,14 @@ define fastcc <128 x i32> @ret_split_v128i32(ptr %x) { ; CHECK-NEXT: addi a2, a1, 256 ; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: addi a2, a1, 384 -; CHECK-NEXT: vle32.v v24, (a1) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle32.v v0, (a2) -; CHECK-NEXT: addi a2, a0, 256 -; CHECK-NEXT: vse32.v v24, (a0) +; CHECK-NEXT: vle32.v v24, (a2) +; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: vle32.v v0, (a1) +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: vse32.v v0, (a0) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vse32.v v0, (a1) -; CHECK-NEXT: vse32.v v16, (a2) +; CHECK-NEXT: vse32.v v24, (a2) +; CHECK-NEXT: vse32.v v16, (a1) ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %v = load <128 x i32>, ptr %x @@ -207,14 +207,15 @@ define fastcc <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x ; CHECK-NEXT: addi s0, sp, 256 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: andi sp, sp, -128 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: mv a3, sp +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: vse32.v v24, (a0) ; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: li a2, 42 -; CHECK-NEXT: vse32.v v8, (a3) -; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: call ext3 ; CHECK-NEXT: addi sp, s0, -256 ; CHECK-NEXT: .cfi_def_cfa sp, 256 @@ -269,8 +270,8 @@ define fastcc <32 x i32> @pass_vector_arg_indirect_stack(<32 x i32> %x, <32 x i3 ; CHECK-NEXT: mv t3, sp ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: li t4, 8 ; CHECK-NEXT: vse32.v v8, (t0) +; CHECK-NEXT: li t4, 8 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: call vector_arg_indirect_stack @@ -307,17 +308,15 @@ define fastcc <32 x i32> @vector_arg_direct_stack(i32 %0, i32 %1, i32 %2, i32 %3 define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32> %y, <32 x i32> %z) { ; CHECK-LABEL: pass_vector_arg_direct_stack: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -176 -; CHECK-NEXT: .cfi_def_cfa_offset 176 -; CHECK-NEXT: sd ra, 168(sp) # 8-byte Folded Spill -; CHECK-NEXT: sd s0, 160(sp) # 8-byte Folded Spill +; CHECK-NEXT: addi sp, sp, -160 +; CHECK-NEXT: .cfi_def_cfa_offset 160 +; CHECK-NEXT: sd ra, 152(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 -; CHECK-NEXT: .cfi_offset s0, -16 ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: addi t0, sp, 16 ; CHECK-NEXT: li t1, 1 ; CHECK-NEXT: li t2, 13 -; CHECK-NEXT: li s0, 12 +; CHECK-NEXT: li t5, 12 ; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: li a2, 2 ; CHECK-NEXT: li a3, 3 @@ -326,23 +325,21 @@ define fastcc <32 x i32> @pass_vector_arg_direct_stack(<32 x i32> %x, <32 x i32> ; CHECK-NEXT: li a6, 6 ; CHECK-NEXT: li a7, 7 ; CHECK-NEXT: li t3, 8 +; CHECK-NEXT: sd t1, 144(sp) +; CHECK-NEXT: li t4, 9 +; CHECK-NEXT: sd t5, 0(sp) +; CHECK-NEXT: sd t2, 8(sp) +; CHECK-NEXT: li t5, 10 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vse32.v v8, (t0) -; CHECK-NEXT: li t4, 9 -; CHECK-NEXT: li t5, 10 -; CHECK-NEXT: sd t1, 144(sp) ; CHECK-NEXT: li t6, 11 -; CHECK-NEXT: sd s0, 0(sp) -; CHECK-NEXT: sd t2, 8(sp) ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: call vector_arg_direct_stack -; CHECK-NEXT: ld ra, 168(sp) # 8-byte Folded Reload -; CHECK-NEXT: ld s0, 160(sp) # 8-byte Folded Reload +; CHECK-NEXT: ld ra, 152(sp) # 8-byte Folded Reload ; CHECK-NEXT: .cfi_restore ra -; CHECK-NEXT: .cfi_restore s0 -; CHECK-NEXT: addi sp, sp, 176 +; CHECK-NEXT: addi sp, sp, 160 ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %s = call fastcc <32 x i32> @vector_arg_direct_stack(i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, <32 x i32> zeroinitializer, i32 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll index f42b4a3a26aad..34600d9a0eaf4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-calling-conv.ll @@ -87,14 +87,14 @@ define <128 x i32> @ret_split_v128i32(ptr %x) { ; CHECK-NEXT: addi a2, a1, 256 ; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: addi a2, a1, 384 -; CHECK-NEXT: vle32.v v24, (a1) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle32.v v0, (a2) -; CHECK-NEXT: addi a2, a0, 256 -; CHECK-NEXT: vse32.v v24, (a0) +; CHECK-NEXT: vle32.v v24, (a2) +; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: vle32.v v0, (a1) +; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: vse32.v v0, (a0) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vse32.v v0, (a1) -; CHECK-NEXT: vse32.v v16, (a2) +; CHECK-NEXT: vse32.v v24, (a2) +; CHECK-NEXT: vse32.v v16, (a1) ; CHECK-NEXT: vse32.v v8, (a0) ; CHECK-NEXT: ret %v = load <128 x i32>, ptr %x @@ -207,14 +207,15 @@ define <32 x i32> @ret_v32i32_call_v32i32_v32i32_v32i32_i32(<32 x i32> %x, <32 x ; CHECK-NEXT: addi s0, sp, 256 ; CHECK-NEXT: .cfi_def_cfa s0, 0 ; CHECK-NEXT: andi sp, sp, -128 +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv8r.v v24, v8 ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: mv a3, sp +; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: vse32.v v24, (a0) ; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: li a2, 42 -; CHECK-NEXT: vse32.v v8, (a3) -; CHECK-NEXT: vmv.v.v v8, v24 ; CHECK-NEXT: call ext3 ; CHECK-NEXT: addi sp, s0, -256 ; CHECK-NEXT: .cfi_def_cfa sp, 256 @@ -267,9 +268,9 @@ define <32 x i32> @call_split_vector_args(ptr %pa, ptr %pb) { ; CHECK-NEXT: li a0, 32 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a1) -; CHECK-NEXT: mv a1, sp ; CHECK-NEXT: mv a0, sp -; CHECK-NEXT: vse32.v v16, (a1) +; CHECK-NEXT: vse32.v v16, (a0) +; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: vmv1r.v v9, v8 ; CHECK-NEXT: vmv1r.v v10, v8 ; CHECK-NEXT: vmv1r.v v11, v8 @@ -313,7 +314,7 @@ define <32 x i32> @pass_vector_arg_via_stack(<32 x i32> %x, <32 x i32> %y, <32 x ; CHECK-NEXT: sd ra, 136(sp) # 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset ra, -8 ; CHECK-NEXT: li a0, 32 -; CHECK-NEXT: li t0, 8 +; CHECK-NEXT: li a7, 8 ; CHECK-NEXT: li a1, 1 ; CHECK-NEXT: li a2, 2 ; CHECK-NEXT: li a3, 3 @@ -322,9 +323,9 @@ define <32 x i32> @pass_vector_arg_via_stack(<32 x i32> %x, <32 x i32> %y, <32 x ; CHECK-NEXT: li a6, 6 ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: sd a7, 128(sp) ; CHECK-NEXT: vse32.v v8, (sp) ; CHECK-NEXT: li a7, 7 -; CHECK-NEXT: sd t0, 128(sp) ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: call vector_arg_via_stack @@ -378,8 +379,8 @@ define <4 x i1> @pass_vector_mask_arg_via_stack(<4 x i1> %v) { ; CHECK-NEXT: vmv.v.v v17, v16 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmsne.vi v16, v17, 0 -; CHECK-NEXT: li a7, 7 ; CHECK-NEXT: vsm.v v16, (a2) +; CHECK-NEXT: li a7, 7 ; CHECK-NEXT: li a0, 0 ; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: li a2, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll index a9b255bb62aeb..3c79f42177721 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll @@ -13,13 +13,13 @@ declare <2 x half> @llvm.vp.ceil.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -35,12 +35,12 @@ define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -59,12 +59,12 @@ define <2 x half> @vp_ceil_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -77,11 +77,11 @@ define <2 x half> @vp_ceil_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -99,13 +99,13 @@ declare <4 x half> @llvm.vp.ceil.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -121,12 +121,12 @@ define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -145,12 +145,12 @@ define <4 x half> @vp_ceil_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,11 +163,11 @@ define <4 x half> @vp_ceil_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -185,13 +185,13 @@ declare <8 x half> @llvm.vp.ceil.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -207,12 +207,12 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -231,12 +231,12 @@ define <8 x half> @vp_ceil_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,11 +249,11 @@ define <8 x half> @vp_ceil_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -273,12 +273,12 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) -; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 3 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -295,12 +295,12 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -319,12 +319,12 @@ define <16 x half> @vp_ceil_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext %e define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_ceil_v16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: fsrmi a0, 3 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -337,11 +337,11 @@ define <16 x half> @vp_ceil_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,9 +363,9 @@ define <2 x float> @vp_ceil_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -384,8 +384,8 @@ define <2 x float> @vp_ceil_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -405,9 +405,9 @@ define <4 x float> @vp_ceil_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -426,8 +426,8 @@ define <4 x float> @vp_ceil_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -448,9 +448,9 @@ define <8 x float> @vp_ceil_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %evl ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -470,8 +470,8 @@ define <8 x float> @vp_ceil_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -492,9 +492,9 @@ define <16 x float> @vp_ceil_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,8 +514,8 @@ define <16 x float> @vp_ceil_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl) ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -531,13 +531,13 @@ declare <2 x double> @llvm.vp.ceil.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_ceil_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -552,12 +552,12 @@ define <2 x double> @vp_ceil_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %e define <2 x double> @vp_ceil_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -575,12 +575,12 @@ define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -596,12 +596,12 @@ define <4 x double> @vp_ceil_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext %e define <4 x double> @vp_ceil_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -619,12 +619,12 @@ define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %e ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -640,12 +640,12 @@ define <8 x double> @vp_ceil_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext %e define <8 x double> @vp_ceil_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI21_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -663,12 +663,12 @@ define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroex ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -684,12 +684,12 @@ define <15 x double> @vp_ceil_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroex define <15 x double> @vp_ceil_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v15f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI23_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -707,12 +707,12 @@ define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroex ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -728,12 +728,12 @@ define <16 x double> @vp_ceil_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroex define <16 x double> @vp_ceil_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_ceil_v16f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI25_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -762,8 +762,8 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -778,33 +778,33 @@ define <32 x double> @vp_ceil_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll index 9d0d42cf754c5..99007aaa8a106 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctlz-vp.ll @@ -1503,38 +1503,29 @@ declare <15 x i64> @llvm.vp.ctlz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1547,57 +1538,34 @@ define <15 x i64> @vp_ctlz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vor.vv v16, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v16, v8, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vand.vv v24, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsub.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -1671,47 +1639,49 @@ define <15 x i64> @vp_ctlz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vx v0, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vnot.v v0, v8 -; RV32-NEXT: vsrl.vi v8, v0, 1 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v24, v0, v24 -; RV32-NEXT: vand.vv v0, v24, v16 -; RV32-NEXT: vsrl.vi v24, v24, 2 -; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v0, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v0 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 @@ -1775,38 +1745,29 @@ declare <16 x i64> @llvm.vp.ctlz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -1819,57 +1780,34 @@ define <16 x i64> @vp_ctlz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vor.vv v16, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v16, v8, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vand.vv v24, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsub.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -1943,47 +1881,49 @@ define <16 x i64> @vp_ctlz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vx v0, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vnot.v v0, v8 -; RV32-NEXT: vsrl.vi v8, v0, 1 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v24, v0, v24 -; RV32-NEXT: vand.vv v0, v24, v16 -; RV32-NEXT: vsrl.vi v24, v24, 2 -; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v0, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v0 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 @@ -2055,7 +1995,8 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -2072,12 +2013,12 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sw a2, 36(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: li a1, 16 -; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: li a1, 16 ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: bltu a0, a1, .LBB34_2 ; RV32-NEXT: # %bb.1: @@ -2087,7 +2028,6 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: li a1, 32 ; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: addi a4, sp, 32 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -2102,34 +2042,25 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a5, 40 -; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a4), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 @@ -2137,38 +2068,41 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 +; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill @@ -2180,61 +2114,37 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a3, a3, a5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -2244,7 +2154,8 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -2266,18 +2177,20 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -2290,41 +2203,25 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -2332,21 +2229,20 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -2372,9 +2268,9 @@ define <32 x i64> @vp_ctlz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 ; RV64-NEXT: # %bb.1: @@ -2495,14 +2391,14 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: sw a2, 36(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: li a3, 16 -; RV32-NEXT: addi a1, a2, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: li a2, 16 ; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a3, .LBB35_2 +; RV32-NEXT: bltu a0, a2, .LBB35_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB35_2: @@ -2550,76 +2446,58 @@ define <32 x i64> @vp_ctlz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsrl.vi v0, v16, 8 ; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vnot.v v0, v8 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 16 -; RV32-NEXT: vor.vv v16, v16, v8 +; RV32-NEXT: vsrl.vi v0, v16, 16 +; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v0, 1 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v0, v0, v24 +; RV32-NEXT: vsub.vv v0, v8, v0 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vor.vv v24, v16, v8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v24, v0, v24 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v0, v16, a2 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v24, v8 -; RV32-NEXT: vsrl.vi v24, v24, 2 -; RV32-NEXT: vand.vv v24, v24, v8 -; RV32-NEXT: vadd.vv v24, v0, v24 +; RV32-NEXT: vand.vv v16, v0, v8 +; RV32-NEXT: vsrl.vi v0, v0, 2 +; RV32-NEXT: vand.vv v0, v0, v8 +; RV32-NEXT: vadd.vv v16, v16, v0 ; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vsrl.vi v0, v16, 1 +; RV32-NEXT: vnot.v v24, v24 +; RV32-NEXT: vsrl.vi v0, v24, 1 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: addi a2, sp, 24 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsub.vv v0, v16, v0 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsub.vv v24, v24, v0 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v24, 4 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v16, 4 +; RV32-NEXT: vadd.vv v16, v16, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v0, v8 -; RV32-NEXT: vsrl.vi v0, v0, 2 -; RV32-NEXT: vand.vv v8, v0, v8 +; RV32-NEXT: vand.vv v0, v24, v8 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v8, v24, v8 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vadd.vv v8, v0, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a2), zero +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a3), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -4213,38 +4091,29 @@ define <8 x i64> @vp_ctlz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -4257,57 +4126,34 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vor.vv v16, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v16, v8, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vand.vv v24, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsub.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -4381,47 +4227,49 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vx v0, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vnot.v v0, v8 -; RV32-NEXT: vsrl.vi v8, v0, 1 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v24, v0, v24 -; RV32-NEXT: vand.vv v0, v24, v16 -; RV32-NEXT: vsrl.vi v24, v24, 2 -; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v0, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v0 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 @@ -4483,38 +4331,29 @@ define <15 x i64> @vp_ctlz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctlz_zero_undef_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -4527,57 +4366,34 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 16, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t -; RV32-NEXT: addi a1, sp, 32 -; RV32-NEXT: vor.vv v16, v8, v16, v0.t +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vor.vv v8, v8, v16, v0.t +; RV32-NEXT: vnot.v v16, v8, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vand.vv v24, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsub.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -4651,47 +4467,49 @@ define <16 x i64> @vp_ctlz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vsrl.vi v16, v8, 1 ; RV32-NEXT: addi a1, a1, 257 ; RV32-NEXT: sw a1, 0(sp) ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: li a1, 32 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 2 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 8 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vi v0, v8, 16 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vsrl.vx v0, v8, a1 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 2 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 4 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 8 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vsrl.vx v16, v8, a1 +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vnot.v v8, v8 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a1, sp, 8 -; RV32-NEXT: vor.vv v8, v8, v0 -; RV32-NEXT: vnot.v v0, v8 -; RV32-NEXT: vsrl.vi v8, v0, 1 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v24, v0, v24 -; RV32-NEXT: vand.vv v0, v24, v16 -; RV32-NEXT: vsrl.vi v24, v24, 2 -; RV32-NEXT: vand.vv v16, v24, v16 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v0, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v0 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 @@ -4761,7 +4579,8 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sub sp, sp, a1 ; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: li a2, 24 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill @@ -4778,12 +4597,12 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sw a2, 36(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: li a1, 16 -; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: li a1, 16 ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: bltu a0, a1, .LBB70_2 ; RV32-NEXT: # %bb.1: @@ -4793,7 +4612,6 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: li a1, 32 ; RV32-NEXT: addi a3, sp, 40 -; RV32-NEXT: addi a4, sp, 32 ; RV32-NEXT: vor.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t ; RV32-NEXT: vor.vv v8, v8, v16, v0.t @@ -4808,34 +4626,25 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a5, 40 -; RV32-NEXT: mul a3, a3, a5 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v8, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a4), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 +; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 @@ -4843,38 +4652,41 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 +; RV32-NEXT: li a4, 48 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v16, v16, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 48 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill @@ -4886,61 +4698,37 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t ; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a5, 24 -; RV32-NEXT: mul a3, a3, a5 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v8, (a4), zero +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -4950,7 +4738,8 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: and a0, a0, a3 ; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload @@ -4972,18 +4761,20 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 24 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -4996,41 +4787,25 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb ; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v8, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 -; RV32-NEXT: add a0, sp, a0 -; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 40 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: li a1, 48 +; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload @@ -5038,21 +4813,20 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload @@ -5078,9 +4852,9 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB70_2 ; RV64-NEXT: # %bb.1: @@ -5201,14 +4975,14 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: sw a2, 36(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: li a3, 16 -; RV32-NEXT: addi a1, a2, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: li a2, 16 ; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a3, .LBB71_2 +; RV32-NEXT: bltu a0, a2, .LBB71_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB71_2: @@ -5256,76 +5030,58 @@ define <32 x i64> @vp_ctlz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vsrl.vi v0, v16, 8 ; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vnot.v v0, v8 +; RV32-NEXT: vnot.v v8, v8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 16 -; RV32-NEXT: vor.vv v16, v16, v8 +; RV32-NEXT: vsrl.vi v0, v16, 16 +; RV32-NEXT: vor.vv v16, v16, v0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v0, 1 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vand.vv v0, v0, v24 +; RV32-NEXT: vsub.vv v0, v8, v0 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vor.vv v24, v16, v8 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v24, v0, v24 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v0, v16, a2 -; RV32-NEXT: vor.vv v16, v16, v0 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v24, v8 -; RV32-NEXT: vsrl.vi v24, v24, 2 -; RV32-NEXT: vand.vv v24, v24, v8 -; RV32-NEXT: vadd.vv v24, v0, v24 +; RV32-NEXT: vand.vv v16, v0, v8 +; RV32-NEXT: vsrl.vi v0, v0, 2 +; RV32-NEXT: vand.vv v0, v0, v8 +; RV32-NEXT: vadd.vv v16, v16, v0 ; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vnot.v v16, v16 -; RV32-NEXT: vsrl.vi v0, v16, 1 +; RV32-NEXT: vnot.v v24, v24 +; RV32-NEXT: vsrl.vi v0, v24, 1 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: addi a2, sp, 24 -; RV32-NEXT: addi a3, sp, 16 -; RV32-NEXT: vsub.vv v0, v16, v0 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vl8r.v v24, (a4) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vsub.vv v24, v24, v0 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v24, 4 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 3 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v0, v16, 4 +; RV32-NEXT: vadd.vv v16, v16, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v0, v8 -; RV32-NEXT: vsrl.vi v0, v0, 2 -; RV32-NEXT: vand.vv v8, v0, v8 +; RV32-NEXT: vand.vv v0, v24, v8 +; RV32-NEXT: vsrl.vi v24, v24, 2 +; RV32-NEXT: vand.vv v8, v24, v8 +; RV32-NEXT: addi a2, sp, 24 +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vadd.vv v8, v0, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a2), zero +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a3), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll index a5a1061842427..dea0ebfd56946 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop-vp.ll @@ -1119,70 +1119,55 @@ declare <15 x i64> @llvm.vp.ctpop.v15i64(<15 x i64>, <15 x i1>, i32) define <15 x i64> @vp_ctpop_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: vand.vv v24, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v24, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v24, v16, v0.t +; RV32-NEXT: vadd.vv v16, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -1248,26 +1233,28 @@ define <15 x i64> @vp_ctpop_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a1), zero -; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v16, v0 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v8, v8, v0 -; RV32-NEXT: vand.vv v0, v8, v24 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 @@ -1318,70 +1305,55 @@ declare <16 x i64> @llvm.vp.ctpop.v16i64(<16 x i64>, <16 x i1>, i32) define <16 x i64> @vp_ctpop_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_ctpop_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t -; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: vand.vv v24, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v24, v0.t ; RV32-NEXT: vand.vv v24, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v24, v16, v0.t +; RV32-NEXT: vadd.vv v16, v24, v16, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -1447,26 +1419,28 @@ define <16 x i64> @vp_ctpop_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a1), zero -; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v16, v0 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v8, v8, v0 -; RV32-NEXT: vand.vv v0, v8, v24 -; RV32-NEXT: vsrl.vi v8, v8, 2 -; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v0, v8 -; RV32-NEXT: vsrl.vi v0, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v0 ; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 @@ -1520,17 +1494,18 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 +; RV32-NEXT: vslidedown.vi v24, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a1, a1, 1365 @@ -1542,102 +1517,118 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sw a2, 36(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: li a3, 16 -; RV32-NEXT: addi a1, a2, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: li a2, 16 ; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a3, .LBB34_2 +; RV32-NEXT: bltu a0, a2, .LBB34_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB34_2: -; RV32-NEXT: addi a2, sp, 40 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: addi a2, sp, 32 -; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 40 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a2), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: li a3, 24 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v8, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: addi a2, sp, 32 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 +; RV32-NEXT: li a3, 40 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 5 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 48 +; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t +; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t +; RV32-NEXT: vadd.vv v8, v8, v16, v0.t ; RV32-NEXT: addi a2, sp, 24 -; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a2), zero -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 24 -; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: slli a2, a2, 5 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a2, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a2), zero ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 +; RV32-NEXT: slli a2, a2, 4 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 ; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill @@ -1645,51 +1636,83 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 +; RV32-NEXT: li a3, 48 +; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: addi a0, sp, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v24, v8, v0.t -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v8, v16, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 40 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a2, 48 +; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a0, a0, a2 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 48 @@ -1710,9 +1733,9 @@ define <32 x i64> @vp_ctpop_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %ev ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a2, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: li a2, 16 ; RV64-NEXT: mv a1, a0 ; RV64-NEXT: bltu a0, a2, .LBB34_2 ; RV64-NEXT: # %bb.1: @@ -1792,12 +1815,9 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 4 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb -; RV32-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV32-NEXT: vmv8r.v v24, v16 +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a1, a1, 1365 @@ -1809,135 +1829,103 @@ define <32 x i64> @vp_ctpop_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: sw a2, 36(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: li a3, 16 -; RV32-NEXT: addi a1, a2, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: li a2, 16 ; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a3, .LBB35_2 +; RV32-NEXT: bltu a0, a2, .LBB35_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB35_2: ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v8, 1 +; RV32-NEXT: vsrl.vi v24, v8, 1 ; RV32-NEXT: addi a2, sp, 40 +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a2), zero +; RV32-NEXT: vlse64.v v8, (a2), zero ; RV32-NEXT: addi a2, a0, -16 ; RV32-NEXT: sltu a0, a0, a2 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a2 -; RV32-NEXT: addi a2, sp, 32 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v0, v16 +; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v0, v16, 1 +; RV32-NEXT: vand.vv v0, v0, v8 +; RV32-NEXT: addi a2, sp, 32 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v0, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vsub.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a2), zero -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vmv8r.v v8, v24 +; RV32-NEXT: vlse64.v v24, (a2), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v24, 1 -; RV32-NEXT: vand.vv v16, v24, v16 -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 3 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v16, v16, v0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v16, v24, v16 +; RV32-NEXT: vand.vv v0, v8, v24 ; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v8, v8, v24 -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v16, v0 +; RV32-NEXT: vand.vv v0, v16, v24 ; RV32-NEXT: csrr a2, vlenb ; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v0, (a2) # Unknown-size Folded Spill ; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v8, v0 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v8, v8, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v0 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v8, v0 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: addi a2, sp, 48 +; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: addi a2, sp, 24 ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v8, v16 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a2), zero -; RV32-NEXT: addi a2, sp, 16 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v24, v0 +; RV32-NEXT: vadd.vv v16, v24, v16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v0, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v0 -; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 -; RV32-NEXT: add a3, sp, a3 -; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a2), zero +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v24, 4 -; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 4 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v24, v16, 4 +; RV32-NEXT: vadd.vv v16, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero +; RV32-NEXT: addi a2, sp, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v24, v8 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a2), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v16, v24, v0 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v24, v8, v0 +; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsrl.vx v8, v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v24, a2 +; RV32-NEXT: vsrl.vx v16, v16, a2 ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 48 ; RV32-NEXT: addi sp, sp, 48 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll index 4fbe67cfcd642..a39fc835f9d85 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ctpop.ll @@ -167,8 +167,6 @@ define void @ctpop_v2i64(ptr %x, ptr %y) { ; ; RV64-LABEL: ctpop_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, 349525 ; RV64-NEXT: lui a2, 209715 ; RV64-NEXT: lui a3, 61681 @@ -185,6 +183,8 @@ define void @ctpop_v2i64(ptr %x, ptr %y) { ; RV64-NEXT: add a3, a3, a5 ; RV64-NEXT: slli a5, a4, 32 ; RV64-NEXT: add a4, a4, a5 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vsrl.vi v9, v8, 1 ; RV64-NEXT: vand.vx v9, v9, a1 ; RV64-NEXT: vsub.vv v8, v8, v9 @@ -473,8 +473,6 @@ define void @ctpop_v4i64(ptr %x, ptr %y) { ; ; RV64-LABEL: ctpop_v4i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, 349525 ; RV64-NEXT: lui a2, 209715 ; RV64-NEXT: lui a3, 61681 @@ -491,6 +489,8 @@ define void @ctpop_v4i64(ptr %x, ptr %y) { ; RV64-NEXT: add a3, a3, a5 ; RV64-NEXT: slli a5, a4, 32 ; RV64-NEXT: add a4, a4, a5 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vsrl.vi v10, v8, 1 ; RV64-NEXT: vand.vx v10, v10, a1 ; RV64-NEXT: vsub.vv v8, v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll index 5f275da1740cb..093ddc36bf7f9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz-vp.ll @@ -1263,91 +1263,59 @@ declare <15 x i64> @llvm.vp.cttz.v15i64(<15 x i64>, i1 immarg, <15 x i1>, i32) define <15 x i64> @vp_cttz_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vand.vv v24, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsub.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -1419,29 +1387,31 @@ define <15 x i64> @vp_cttz_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a1), zero -; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vsrl.vi v8, v16, 1 -; RV32-NEXT: vand.vv v0, v8, v0 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v16, v16, v0 -; RV32-NEXT: vand.vv v0, v16, v24 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v0, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v0 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 @@ -1495,91 +1465,59 @@ declare <16 x i64> @llvm.vp.cttz.v16i64(<16 x i64>, i1 immarg, <16 x i1>, i32) define <16 x i64> @vp_cttz_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v16i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vand.vv v24, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsub.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -1651,29 +1589,31 @@ define <16 x i64> @vp_cttz_v16i64_unmasked(<16 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a1), zero -; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vsrl.vi v8, v16, 1 -; RV32-NEXT: vand.vv v0, v8, v0 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v16, v16, v0 -; RV32-NEXT: vand.vv v0, v16, v24 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v0, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v0 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 @@ -1730,18 +1670,17 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 +; RV32-NEXT: vslidedown.vi v24, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a1, a1, 1365 @@ -1753,12 +1692,12 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sw a2, 36(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: li a1, 16 -; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: li a1, 16 ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: bltu a0, a1, .LBB34_2 ; RV32-NEXT: # %bb.1: @@ -1771,95 +1710,116 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 48 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v24, v16, v24, v0.t -; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -1867,84 +1827,88 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v16, a1, v0.t -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 48 @@ -1965,9 +1929,9 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a1, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: mv a4, a0 ; RV64-NEXT: bltu a0, a1, .LBB34_2 ; RV64-NEXT: # %bb.1: @@ -2051,45 +2015,41 @@ define <32 x i64> @vp_cttz_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 32(sp) -; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a2, 20(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: li a3, 16 -; RV32-NEXT: addi a1, a2, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: sw a2, 4(sp) +; RV32-NEXT: li a2, 16 ; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a3, .LBB35_2 +; RV32-NEXT: bltu a0, a2, .LBB35_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB35_2: ; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vnot.v v0, v8 -; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v8, v8, a2 ; RV32-NEXT: vand.vv v8, v0, v8 @@ -2100,59 +2060,50 @@ define <32 x i64> @vp_cttz_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v0, v8, 1 ; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vsub.vv v0, v8, v0 +; RV32-NEXT: vsub.vv v8, v8, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 1 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsrl.vi v0, v16, 1 +; RV32-NEXT: vand.vv v24, v0, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: addi a2, sp, 24 -; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vlse64.v v0, (a3), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vv v16, v16, v24 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v0, v8 -; RV32-NEXT: vsrl.vi v0, v0, 2 -; RV32-NEXT: vand.vv v0, v0, v8 -; RV32-NEXT: vadd.vv v24, v24, v0 +; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v16, v8 +; RV32-NEXT: vand.vv v24, v16, v0 ; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v16, v16, v0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v24, 4 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 8 +; RV32-NEXT: mv a3, sp +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v16, 4 +; RV32-NEXT: vadd.vv v16, v16, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a2), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v0, v8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v16, v16, v0 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v24, v8, v0 +; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsrl.vx v8, v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v24, a2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vsrl.vx v16, v16, a2 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -3460,91 +3411,59 @@ define <8 x i64> @vp_cttz_zero_undef_v8i64_unmasked(<8 x i64> %va, i32 zeroext % define <15 x i64> @vp_cttz_zero_undef_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v15i64: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vand.vv v24, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsub.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -3616,29 +3535,31 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a1), zero -; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vsrl.vi v8, v16, 1 -; RV32-NEXT: vand.vv v0, v8, v0 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v16, v16, v0 -; RV32-NEXT: vand.vv v0, v16, v24 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v0, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v0 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 @@ -3689,92 +3610,60 @@ define <15 x i64> @vp_cttz_zero_undef_v15i64_unmasked(<15 x i64> %va, i32 zeroex define <16 x i64> @vp_cttz_zero_undef_v16i64(<16 x i64> %va, <16 x i1> %m, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v16i64: -; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 4 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 16 * vlenb +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: li a1, 1 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v16, v8, a1, v0.t ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 209715 ; RV32-NEXT: addi a1, a1, 819 -; RV32-NEXT: sw a1, 32(sp) -; RV32-NEXT: sw a1, 36(sp) +; RV32-NEXT: sw a1, 16(sp) +; RV32-NEXT: sw a1, 20(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: lui a1, 4112 ; RV32-NEXT: addi a1, a1, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) -; RV32-NEXT: addi a1, sp, 40 +; RV32-NEXT: sw a1, 0(sp) +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 32 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: vsrl.vi v8, v16, 1, v0.t +; RV32-NEXT: vand.vv v24, v8, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v24, (a1), zero -; RV32-NEXT: addi a1, sp, 48 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: addi a1, sp, 24 -; RV32-NEXT: vsub.vv v8, v16, v8, v0.t -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v8, v24, v0.t -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v24, v8, v24, v0.t +; RV32-NEXT: vsub.vv v24, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsrl.vi v24, v24, 2, v0.t +; RV32-NEXT: vand.vv v24, v24, v8, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v24, v16, v24, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t +; RV32-NEXT: vsrl.vi v24, v16, 4, v0.t +; RV32-NEXT: vadd.vv v16, v16, v24, v0.t ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v16, (a1), zero +; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: li a1, 56 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v24, 4, v0.t -; RV32-NEXT: vadd.vv v8, v24, v8, v0.t -; RV32-NEXT: li a0, 56 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 48 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v8, v8, v24, v0.t -; RV32-NEXT: vmul.vv v8, v8, v16, v0.t -; RV32-NEXT: vsrl.vx v8, v8, a0, v0.t -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vand.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v24, v0.t +; RV32-NEXT: vsrl.vx v8, v8, a1, v0.t +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; @@ -3846,29 +3735,31 @@ define <16 x i64> @vp_cttz_zero_undef_v16i64_unmasked(<16 x i64> %va, i32 zeroex ; RV32-NEXT: sw a1, 4(sp) ; RV32-NEXT: addi a1, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a1), zero -; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vlse64.v v24, (a1), zero +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vsrl.vi v16, v8, 1 +; RV32-NEXT: vand.vv v24, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: addi a1, sp, 8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v8, v16 -; RV32-NEXT: vsrl.vi v8, v16, 1 -; RV32-NEXT: vand.vv v0, v8, v0 +; RV32-NEXT: vsub.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v8, v16 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: mv a1, sp ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vv v16, v16, v0 -; RV32-NEXT: vand.vv v0, v16, v24 -; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vadd.vv v8, v24, v8 +; RV32-NEXT: vsrl.vi v24, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a1), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v16, v0, v16 -; RV32-NEXT: vsrl.vi v0, v16, 4 -; RV32-NEXT: vadd.vv v16, v16, v0 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v8, v8, v16 ; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: li a0, 56 ; RV32-NEXT: vsrl.vx v8, v8, a0 @@ -3923,18 +3814,17 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: addi sp, sp, -48 ; RV32-NEXT: .cfi_def_cfa_offset 48 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 56 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 48 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x38, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 56 * vlenb ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 24 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 5 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 48 ; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v7, v0, 2 +; RV32-NEXT: vslidedown.vi v24, v0, 2 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a1, a1, 1365 @@ -3946,12 +3836,12 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sw a2, 36(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a1, a1, -241 +; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a1, 24(sp) ; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: li a1, 16 -; RV32-NEXT: addi a2, a2, 257 ; RV32-NEXT: sw a2, 16(sp) ; RV32-NEXT: sw a2, 20(sp) +; RV32-NEXT: li a1, 16 ; RV32-NEXT: mv a2, a0 ; RV32-NEXT: bltu a0, a1, .LBB70_2 ; RV32-NEXT: # %bb.1: @@ -3964,95 +3854,116 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: vnot.v v8, v8, v0.t ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: slli a4, a4, 5 +; RV32-NEXT: li a5, 48 +; RV32-NEXT: mul a4, a4, a5 ; RV32-NEXT: add a4, sp, a4 ; RV32-NEXT: addi a4, a4, 48 ; RV32-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 40 +; RV32-NEXT: li a4, 24 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill -; RV32-NEXT: addi a3, sp, 32 -; RV32-NEXT: vlse64.v v8, (a3), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 4 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vsrl.vi v16, v16, 1, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v16, 1, v0.t +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: li a4, 40 ; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v24, v24, v16, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v8, (a3), zero +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 48 +; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: add a3, sp, a3 +; RV32-NEXT: addi a3, a3, 48 +; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: csrr a3, vlenb +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vsub.vv v24, v16, v24, v0.t -; RV32-NEXT: vand.vv v16, v24, v8, v0.t +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vsrl.vi v16, v24, 2, v0.t -; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; RV32-NEXT: vadd.vv v8, v8, v16, v0.t +; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vsrl.vi v16, v16, 2, v0.t +; RV32-NEXT: vand.vv v16, v16, v8, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vadd.vv v16, v8, v16, v0.t +; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t +; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: addi a3, sp, 24 -; RV32-NEXT: addi a4, sp, 16 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a3), zero -; RV32-NEXT: addi a3, sp, 48 -; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV32-NEXT: vlse64.v v8, (a4), zero ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 5 +; RV32-NEXT: li a4, 40 +; RV32-NEXT: mul a3, a3, a4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v16, (a3), zero ; RV32-NEXT: csrr a3, vlenb ; RV32-NEXT: slli a3, a3, 3 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v24, v8, 4, v0.t -; RV32-NEXT: vadd.vv v8, v8, v24, v0.t -; RV32-NEXT: vand.vv v16, v8, v16, v0.t -; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: slli a2, a2, 5 -; RV32-NEXT: add a2, sp, a2 -; RV32-NEXT: addi a2, a2, 48 -; RV32-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload -; RV32-NEXT: vmul.vv v8, v16, v8, v0.t +; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsrl.vx v8, v8, a2, v0.t ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: slli a3, a3, 3 +; RV32-NEXT: slli a3, a3, 4 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill @@ -4060,84 +3971,88 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vmv1r.v v0, v24 ; RV32-NEXT: csrr a3, vlenb -; RV32-NEXT: li a4, 24 -; RV32-NEXT: mul a3, a3, a4 +; RV32-NEXT: slli a3, a3, 5 ; RV32-NEXT: add a3, sp, a3 ; RV32-NEXT: addi a3, a3, 48 -; RV32-NEXT: vl8r.v v16, (a3) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsub.vx v8, v16, a1, v0.t -; RV32-NEXT: vnot.v v16, v16, v0.t -; RV32-NEXT: vand.vv v8, v16, v8, v0.t -; RV32-NEXT: vsrl.vi v24, v8, 1, v0.t +; RV32-NEXT: vsub.vx v16, v8, a1, v0.t +; RV32-NEXT: vnot.v v8, v8, v0.t +; RV32-NEXT: vand.vv v8, v8, v16, v0.t +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 1, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v24, v16, v0.t -; RV32-NEXT: vsub.vv v8, v8, v16, v0.t +; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 24 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 4 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v16, v8, v16, v0.t +; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vsub.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vand.vv v16, v16, v24, v0.t +; RV32-NEXT: vand.vv v16, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 40 +; RV32-NEXT: li a1, 48 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 -; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t -; RV32-NEXT: vand.vv v8, v8, v24, v0.t +; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 24 -; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: slli a0, a0, 5 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vadd.vv v8, v16, v8, v0.t ; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t ; RV32-NEXT: vadd.vv v8, v8, v16, v0.t -; RV32-NEXT: addi a0, sp, 48 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: li a1, 40 +; RV32-NEXT: mul a0, a0, a1 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vand.vv v8, v8, v16, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 5 +; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV32-NEXT: vmul.vv v8, v8, v16, v0.t ; RV32-NEXT: vsrl.vx v16, v8, a2, v0.t ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: slli a0, a0, 4 ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 48 ; RV32-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 48 +; RV32-NEXT: li a1, 56 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 48 @@ -4158,9 +4073,9 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: li a1, 16 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v24, v0, 2 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: mv a4, a0 ; RV64-NEXT: bltu a0, a1, .LBB70_2 ; RV64-NEXT: # %bb.1: @@ -4244,45 +4159,41 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64(<32 x i64> %va, <32 x i1> %m, i32 z define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroext %evl) { ; RV32-LABEL: vp_cttz_zero_undef_v32i64_unmasked: ; RV32: # %bb.0: -; RV32-NEXT: addi sp, sp, -48 -; RV32-NEXT: .cfi_def_cfa_offset 48 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 3 -; RV32-NEXT: sub sp, sp, a1 -; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: .cfi_def_cfa_offset 32 ; RV32-NEXT: lui a1, 349525 ; RV32-NEXT: lui a2, 209715 ; RV32-NEXT: addi a1, a1, 1365 -; RV32-NEXT: sw a1, 40(sp) -; RV32-NEXT: sw a1, 44(sp) +; RV32-NEXT: sw a1, 24(sp) +; RV32-NEXT: sw a1, 28(sp) ; RV32-NEXT: lui a1, 61681 ; RV32-NEXT: addi a2, a2, 819 -; RV32-NEXT: sw a2, 32(sp) -; RV32-NEXT: sw a2, 36(sp) +; RV32-NEXT: sw a2, 16(sp) +; RV32-NEXT: sw a2, 20(sp) ; RV32-NEXT: lui a2, 4112 ; RV32-NEXT: addi a1, a1, -241 -; RV32-NEXT: sw a1, 24(sp) -; RV32-NEXT: sw a1, 28(sp) -; RV32-NEXT: li a3, 16 -; RV32-NEXT: addi a1, a2, 257 -; RV32-NEXT: sw a1, 16(sp) -; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: addi a2, a2, 257 +; RV32-NEXT: sw a1, 8(sp) +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: sw a2, 0(sp) +; RV32-NEXT: sw a2, 4(sp) +; RV32-NEXT: li a2, 16 ; RV32-NEXT: mv a1, a0 -; RV32-NEXT: bltu a0, a3, .LBB71_2 +; RV32-NEXT: bltu a0, a2, .LBB71_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a1, 16 ; RV32-NEXT: .LBB71_2: ; RV32-NEXT: li a2, 1 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vnot.v v0, v8 -; RV32-NEXT: addi a3, sp, 40 +; RV32-NEXT: addi a3, sp, 24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: addi a3, a0, -16 ; RV32-NEXT: sltu a0, a0, a3 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a3 -; RV32-NEXT: addi a3, sp, 32 +; RV32-NEXT: addi a3, sp, 16 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsub.vx v8, v8, a2 ; RV32-NEXT: vand.vv v8, v0, v8 @@ -4293,59 +4204,50 @@ define <32 x i64> @vp_cttz_zero_undef_v32i64_unmasked(<32 x i64> %va, i32 zeroex ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsrl.vi v0, v8, 1 ; RV32-NEXT: vand.vv v0, v0, v24 -; RV32-NEXT: vsub.vv v0, v8, v0 +; RV32-NEXT: vsub.vv v8, v8, v0 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v8, v16, 1 -; RV32-NEXT: vand.vv v24, v8, v24 +; RV32-NEXT: vsrl.vi v0, v16, 1 +; RV32-NEXT: vand.vv v24, v0, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a3), zero -; RV32-NEXT: addi a2, sp, 24 -; RV32-NEXT: addi a3, sp, 16 +; RV32-NEXT: vlse64.v v0, (a3), zero ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsub.vv v16, v16, v24 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v24, v0, v8 -; RV32-NEXT: vsrl.vi v0, v0, 2 -; RV32-NEXT: vand.vv v0, v0, v8 -; RV32-NEXT: vadd.vv v24, v24, v0 +; RV32-NEXT: vand.vv v24, v8, v0 +; RV32-NEXT: vsrl.vi v8, v8, 2 +; RV32-NEXT: vand.vv v8, v8, v0 +; RV32-NEXT: vadd.vv v8, v24, v8 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vand.vv v0, v16, v8 +; RV32-NEXT: vand.vv v24, v16, v0 ; RV32-NEXT: vsrl.vi v16, v16, 2 -; RV32-NEXT: vand.vv v8, v16, v8 +; RV32-NEXT: vand.vv v16, v16, v0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v24, 4 +; RV32-NEXT: vsrl.vi v0, v8, 4 +; RV32-NEXT: vadd.vv v8, v8, v0 +; RV32-NEXT: addi a2, sp, 8 +; RV32-NEXT: mv a3, sp +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vadd.vv v16, v24, v16 -; RV32-NEXT: addi a4, sp, 48 -; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vsrl.vi v24, v16, 4 +; RV32-NEXT: vadd.vv v16, v16, v24 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v24, (a2), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v0, v8 -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v0, (a3), zero -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vi v16, v8, 4 -; RV32-NEXT: vadd.vv v8, v8, v16 -; RV32-NEXT: addi a2, sp, 48 -; RV32-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vand.vv v16, v16, v24 -; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV32-NEXT: vand.vv v16, v16, v24 +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vlse64.v v24, (a3), zero ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v16, v16, v0 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vmul.vv v24, v8, v0 +; RV32-NEXT: vmul.vv v16, v16, v24 ; RV32-NEXT: li a2, 56 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v8, v16, a2 +; RV32-NEXT: vsrl.vx v8, v8, a2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; RV32-NEXT: vsrl.vx v16, v24, a2 -; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: slli a0, a0, 3 -; RV32-NEXT: add sp, sp, a0 -; RV32-NEXT: .cfi_def_cfa sp, 48 -; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: vsrl.vx v16, v16, a2 +; RV32-NEXT: addi sp, sp, 32 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll index 57e0eeb92ee2f..ddf92af2312cc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-cttz.ll @@ -45,9 +45,9 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind { ; RVF-NEXT: vnsrl.wi v10, v12, 23 ; RVF-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RVF-NEXT: vnsrl.wi v9, v10, 0 +; RVF-NEXT: vsub.vx v9, v9, a1 ; RVF-NEXT: vmseq.vi v0, v8, 0 -; RVF-NEXT: vsub.vx v8, v9, a1 -; RVF-NEXT: vmerge.vim v8, v8, 8, v0 +; RVF-NEXT: vmerge.vim v8, v9, 8, v0 ; RVF-NEXT: vse8.v v8, (a0) ; RVF-NEXT: ret ; @@ -64,9 +64,9 @@ define void @cttz_v16i8(ptr %x, ptr %y) nounwind { ; RVD-NEXT: vnsrl.wi v10, v12, 23 ; RVD-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RVD-NEXT: vnsrl.wi v9, v10, 0 +; RVD-NEXT: vsub.vx v9, v9, a1 ; RVD-NEXT: vmseq.vi v0, v8, 0 -; RVD-NEXT: vsub.vx v8, v9, a1 -; RVD-NEXT: vmerge.vim v8, v8, 8, v0 +; RVD-NEXT: vmerge.vim v8, v9, 8, v0 ; RVD-NEXT: vse8.v v8, (a0) ; RVD-NEXT: ret ; @@ -390,10 +390,10 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { ; RVI-LABEL: cttz_v32i8: ; RVI: # %bb.0: ; RVI-NEXT: li a1, 32 +; RVI-NEXT: li a2, 1 ; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RVI-NEXT: vle8.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 +; RVI-NEXT: vsub.vx v10, v8, a2 ; RVI-NEXT: li a1, 85 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 @@ -425,9 +425,9 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { ; RVF-NEXT: vnsrl.wi v12, v16, 23 ; RVF-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RVF-NEXT: vnsrl.wi v10, v12, 0 +; RVF-NEXT: vsub.vx v10, v10, a1 ; RVF-NEXT: vmseq.vi v0, v8, 0 -; RVF-NEXT: vsub.vx v8, v10, a1 -; RVF-NEXT: vmerge.vim v8, v8, 8, v0 +; RVF-NEXT: vmerge.vim v8, v10, 8, v0 ; RVF-NEXT: vse8.v v8, (a0) ; RVF-NEXT: ret ; @@ -445,9 +445,9 @@ define void @cttz_v32i8(ptr %x, ptr %y) nounwind { ; RVD-NEXT: vnsrl.wi v12, v16, 23 ; RVD-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; RVD-NEXT: vnsrl.wi v10, v12, 0 +; RVD-NEXT: vsub.vx v10, v10, a1 ; RVD-NEXT: vmseq.vi v0, v8, 0 -; RVD-NEXT: vsub.vx v8, v10, a1 -; RVD-NEXT: vmerge.vim v8, v8, 8, v0 +; RVD-NEXT: vmerge.vim v8, v10, 8, v0 ; RVD-NEXT: vse8.v v8, (a0) ; RVD-NEXT: ret ; @@ -1121,10 +1121,10 @@ define void @cttz_zero_undef_v32i8(ptr %x, ptr %y) nounwind { ; RVI-LABEL: cttz_zero_undef_v32i8: ; RVI: # %bb.0: ; RVI-NEXT: li a1, 32 +; RVI-NEXT: li a2, 1 ; RVI-NEXT: vsetvli zero, a1, e8, m2, ta, ma ; RVI-NEXT: vle8.v v8, (a0) -; RVI-NEXT: li a1, 1 -; RVI-NEXT: vsub.vx v10, v8, a1 +; RVI-NEXT: vsub.vx v10, v8, a2 ; RVI-NEXT: li a1, 85 ; RVI-NEXT: vnot.v v8, v8 ; RVI-NEXT: vand.vv v8, v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index b4634dbf5a5e8..b611fcd9ddb33 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -22,10 +22,10 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vadd.vi v12, v11, -16 +; CHECK-NEXT: vadd.vi v11, v11, -15 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vadd.vi v11, v11, -15 ; CHECK-NEXT: vmerge.vim v13, v10, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll index e13f4f4b50b0f..76e1ae0a69c24 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-elen.ll @@ -26,26 +26,26 @@ define void @add_v4i32(ptr %x, ptr %y) { define void @add_v2i64(ptr %x, ptr %y) { ; RV32-LABEL: add_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: lw a2, 0(a1) -; RV32-NEXT: lw a3, 4(a1) -; RV32-NEXT: lw a4, 0(a0) -; RV32-NEXT: lw a5, 4(a0) -; RV32-NEXT: lw a6, 8(a0) -; RV32-NEXT: lw a7, 12(a0) -; RV32-NEXT: lw t0, 12(a1) -; RV32-NEXT: lw a1, 8(a1) -; RV32-NEXT: add a3, a5, a3 -; RV32-NEXT: add a2, a4, a2 -; RV32-NEXT: add a7, a7, t0 -; RV32-NEXT: add a1, a6, a1 -; RV32-NEXT: sltu a4, a2, a4 -; RV32-NEXT: sltu a5, a1, a6 -; RV32-NEXT: add a3, a3, a4 -; RV32-NEXT: add a5, a7, a5 -; RV32-NEXT: sw a2, 0(a0) -; RV32-NEXT: sw a3, 4(a0) -; RV32-NEXT: sw a1, 8(a0) -; RV32-NEXT: sw a5, 12(a0) +; RV32-NEXT: lw a2, 0(a0) +; RV32-NEXT: lw a3, 4(a0) +; RV32-NEXT: lw a4, 8(a0) +; RV32-NEXT: lw a5, 12(a0) +; RV32-NEXT: lw a6, 0(a1) +; RV32-NEXT: lw a7, 4(a1) +; RV32-NEXT: lw t0, 8(a1) +; RV32-NEXT: lw a1, 12(a1) +; RV32-NEXT: add a3, a3, a7 +; RV32-NEXT: add a6, a2, a6 +; RV32-NEXT: add a1, a5, a1 +; RV32-NEXT: add t0, a4, t0 +; RV32-NEXT: sltu a2, a6, a2 +; RV32-NEXT: sltu a4, t0, a4 +; RV32-NEXT: add a2, a3, a2 +; RV32-NEXT: add a1, a1, a4 +; RV32-NEXT: sw a6, 0(a0) +; RV32-NEXT: sw a2, 4(a0) +; RV32-NEXT: sw t0, 8(a0) +; RV32-NEXT: sw a1, 12(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: add_v2i64: @@ -89,14 +89,14 @@ define void @add_v1i64(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: lw a2, 0(a0) ; RV32-NEXT: lw a3, 4(a0) -; RV32-NEXT: lw a4, 4(a1) -; RV32-NEXT: lw a1, 0(a1) -; RV32-NEXT: add a3, a3, a4 -; RV32-NEXT: add a1, a2, a1 -; RV32-NEXT: sltu a2, a1, a2 -; RV32-NEXT: add a2, a3, a2 -; RV32-NEXT: sw a1, 0(a0) -; RV32-NEXT: sw a2, 4(a0) +; RV32-NEXT: lw a4, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: add a1, a3, a1 +; RV32-NEXT: add a4, a2, a4 +; RV32-NEXT: sltu a2, a4, a2 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: sw a4, 0(a0) +; RV32-NEXT: sw a1, 4(a0) ; RV32-NEXT: ret ; ; RV64-LABEL: add_v1i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll index bb2b57fbcc3b7..54489765cff1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extload-truncstore.ll @@ -308,9 +308,9 @@ define void @truncstore_v2i8_v2i1(<2 x i8> %x, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vand.vi v8, v8, 1 +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll index e53876d69b59b..b350268a3c10c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-i1.ll @@ -10,9 +10,9 @@ define i1 @extractelt_v1i1(ptr %x, i64 %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.s.x v9, zero ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.s.x v8, zero -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -27,9 +27,9 @@ define i1 @extractelt_v2i1(ptr %x, i64 %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -44,9 +44,9 @@ define i1 @extractelt_v4i1(ptr %x, i64 %idx) nounwind { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vmseq.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vslidedown.vx v8, v8, a1 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret @@ -328,13 +328,13 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32-NEXT: mv a2, sp ; RV32-NEXT: li a3, 128 ; RV32-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; RV32-NEXT: vle8.v v8, (a0) -; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: vle8.v v16, (a0) +; RV32-NEXT: addi a0, a0, 128 ; RV32-NEXT: add a1, a2, a1 -; RV32-NEXT: vmseq.vi v0, v8, 0 +; RV32-NEXT: vle8.v v24, (a0) +; RV32-NEXT: vmseq.vi v8, v24, 0 ; RV32-NEXT: vmv.v.i v24, 0 -; RV32-NEXT: vmseq.vi v8, v16, 0 +; RV32-NEXT: vmseq.vi v0, v16, 0 ; RV32-NEXT: vmerge.vim v16, v24, 1, v0 ; RV32-NEXT: vse8.v v16, (a2) ; RV32-NEXT: vmv1r.v v0, v8 @@ -359,13 +359,13 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64-NEXT: mv a2, sp ; RV64-NEXT: li a3, 128 ; RV64-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; RV64-NEXT: vle8.v v8, (a0) -; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: vle8.v v16, (a0) +; RV64-NEXT: addi a0, a0, 128 ; RV64-NEXT: add a1, a2, a1 -; RV64-NEXT: vmseq.vi v0, v8, 0 +; RV64-NEXT: vle8.v v24, (a0) +; RV64-NEXT: vmseq.vi v8, v24, 0 ; RV64-NEXT: vmv.v.i v24, 0 -; RV64-NEXT: vmseq.vi v8, v16, 0 +; RV64-NEXT: vmseq.vi v0, v16, 0 ; RV64-NEXT: vmerge.vim v16, v24, 1, v0 ; RV64-NEXT: vse8.v v16, (a2) ; RV64-NEXT: vmv1r.v v0, v8 @@ -390,13 +390,13 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV32ZBS-NEXT: mv a2, sp ; RV32ZBS-NEXT: li a3, 128 ; RV32ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; RV32ZBS-NEXT: vle8.v v8, (a0) -; RV32ZBS-NEXT: addi a0, a0, 128 ; RV32ZBS-NEXT: vle8.v v16, (a0) +; RV32ZBS-NEXT: addi a0, a0, 128 ; RV32ZBS-NEXT: add a1, a2, a1 -; RV32ZBS-NEXT: vmseq.vi v0, v8, 0 +; RV32ZBS-NEXT: vle8.v v24, (a0) +; RV32ZBS-NEXT: vmseq.vi v8, v24, 0 ; RV32ZBS-NEXT: vmv.v.i v24, 0 -; RV32ZBS-NEXT: vmseq.vi v8, v16, 0 +; RV32ZBS-NEXT: vmseq.vi v0, v16, 0 ; RV32ZBS-NEXT: vmerge.vim v16, v24, 1, v0 ; RV32ZBS-NEXT: vse8.v v16, (a2) ; RV32ZBS-NEXT: vmv1r.v v0, v8 @@ -421,13 +421,13 @@ define i1 @extractelt_v256i1(ptr %x, i64 %idx) nounwind { ; RV64ZBS-NEXT: mv a2, sp ; RV64ZBS-NEXT: li a3, 128 ; RV64ZBS-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; RV64ZBS-NEXT: vle8.v v8, (a0) -; RV64ZBS-NEXT: addi a0, a0, 128 ; RV64ZBS-NEXT: vle8.v v16, (a0) +; RV64ZBS-NEXT: addi a0, a0, 128 ; RV64ZBS-NEXT: add a1, a2, a1 -; RV64ZBS-NEXT: vmseq.vi v0, v8, 0 +; RV64ZBS-NEXT: vle8.v v24, (a0) +; RV64ZBS-NEXT: vmseq.vi v8, v24, 0 ; RV64ZBS-NEXT: vmv.v.i v24, 0 -; RV64ZBS-NEXT: vmseq.vi v8, v16, 0 +; RV64ZBS-NEXT: vmseq.vi v0, v16, 0 ; RV64ZBS-NEXT: vmerge.vim v16, v24, 1, v0 ; RV64ZBS-NEXT: vse8.v v16, (a2) ; RV64ZBS-NEXT: vmv1r.v v0, v8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll index e9dca2c42e835..c7370102be738 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract-subvector.ll @@ -560,12 +560,13 @@ define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { ; VLA-NEXT: vlm.v v0, (a0) ; VLA-NEXT: vmv.v.i v8, 0 ; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, m1, ta, ma ; VLA-NEXT: vslidedown.vi v8, v8, 2 ; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLA-NEXT: vmsne.vi v0, v8, 0 -; VLA-NEXT: vmv.v.i v8, 0 -; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vmerge.vim v8, v9, 1, v0 ; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -581,12 +582,13 @@ define void @extract_v2i1_v64i1_2(ptr %x, ptr %y) { ; VLS-NEXT: vlm.v v0, (a0) ; VLS-NEXT: vmv.v.i v8, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 +; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLS-NEXT: vmv.v.i v9, 0 ; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma ; VLS-NEXT: vslidedown.vi v8, v8, 2 ; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLS-NEXT: vmsne.vi v0, v8, 0 -; VLS-NEXT: vmv.v.i v8, 0 -; VLS-NEXT: vmerge.vim v8, v8, 1, v0 +; VLS-NEXT: vmerge.vim v8, v9, 1, v0 ; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLS-NEXT: vmv.v.i v9, 0 ; VLS-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -610,12 +612,13 @@ define void @extract_v2i1_v64i1_42(ptr %x, ptr %y) { ; VLA-NEXT: li a0, 42 ; VLA-NEXT: vmv.v.i v8, 0 ; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLA-NEXT: vmv.v.i v12, 0 ; VLA-NEXT: vsetivli zero, 2, e8, m4, ta, ma ; VLA-NEXT: vslidedown.vx v8, v8, a0 ; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLA-NEXT: vmsne.vi v0, v8, 0 -; VLA-NEXT: vmv.v.i v8, 0 -; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vmerge.vim v8, v12, 1, v0 ; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -631,11 +634,12 @@ define void @extract_v2i1_v64i1_42(ptr %x, ptr %y) { ; VLS-NEXT: vlm.v v0, (a0) ; VLS-NEXT: vmv.v.i v8, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 -; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma -; VLS-NEXT: vslidedown.vi v8, v10, 10 ; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; VLS-NEXT: vmsne.vi v0, v8, 0 ; VLS-NEXT: vmv.v.i v8, 0 +; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; VLS-NEXT: vslidedown.vi v9, v10, 10 +; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLS-NEXT: vmsne.vi v0, v9, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 ; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLS-NEXT: vmv.v.i v9, 0 @@ -676,12 +680,13 @@ define void @extract_v2i1_nxv2i1_2( %x, ptr %y) { ; VLA-NEXT: vsetvli a1, zero, e8, mf4, ta, ma ; VLA-NEXT: vmv.v.i v8, 0 ; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; VLA-NEXT: vslidedown.vi v8, v8, 2 ; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLA-NEXT: vmsne.vi v0, v8, 0 -; VLA-NEXT: vmv.v.i v8, 0 -; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vmerge.vim v8, v9, 1, v0 ; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -696,12 +701,13 @@ define void @extract_v2i1_nxv2i1_2( %x, ptr %y) { ; VLS-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; VLS-NEXT: vmv.v.i v8, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 +; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLS-NEXT: vmv.v.i v9, 0 ; VLS-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; VLS-NEXT: vslidedown.vi v8, v8, 2 ; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLS-NEXT: vmsne.vi v0, v8, 0 -; VLS-NEXT: vmv.v.i v8, 0 -; VLS-NEXT: vmerge.vim v8, v8, 1, v0 +; VLS-NEXT: vmerge.vim v8, v9, 1, v0 ; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLS-NEXT: vmv.v.i v9, 0 ; VLS-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -740,12 +746,13 @@ define void @extract_v2i1_nxv64i1_2( %x, ptr %y) { ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, m1, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 2 ; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vmsne.vi v0, v8, 0 -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -766,12 +773,13 @@ define void @extract_v2i1_nxv64i1_42( %x, ptr %y) { ; VLA-NEXT: vmv.v.i v8, 0 ; VLA-NEXT: li a1, 42 ; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLA-NEXT: vmv.v.i v12, 0 ; VLA-NEXT: vsetivli zero, 2, e8, m4, ta, ma ; VLA-NEXT: vslidedown.vx v8, v8, a1 ; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLA-NEXT: vmsne.vi v0, v8, 0 -; VLA-NEXT: vmv.v.i v8, 0 -; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vmerge.vim v8, v12, 1, v0 ; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -786,11 +794,12 @@ define void @extract_v2i1_nxv64i1_42( %x, ptr %y) { ; VLS-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; VLS-NEXT: vmv.v.i v8, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 -; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma -; VLS-NEXT: vslidedown.vi v8, v10, 10 ; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; VLS-NEXT: vmsne.vi v0, v8, 0 ; VLS-NEXT: vmv.v.i v8, 0 +; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; VLS-NEXT: vslidedown.vi v9, v10, 10 +; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLS-NEXT: vmsne.vi v0, v9, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 ; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLS-NEXT: vmv.v.i v9, 0 @@ -811,12 +820,13 @@ define void @extract_v2i1_nxv32i1_26( %x, ptr %y) { ; VLA-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; VLA-NEXT: vmv.v.i v8, 0 ; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLA-NEXT: vmv.v.i v10, 0 ; VLA-NEXT: vsetivli zero, 2, e8, m2, ta, ma ; VLA-NEXT: vslidedown.vi v8, v8, 26 ; VLA-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; VLA-NEXT: vmsne.vi v0, v8, 0 -; VLA-NEXT: vmv.v.i v8, 0 -; VLA-NEXT: vmerge.vim v8, v8, 1, v0 +; VLA-NEXT: vmerge.vim v8, v10, 1, v0 ; VLA-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLA-NEXT: vmv.v.i v9, 0 ; VLA-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -831,11 +841,12 @@ define void @extract_v2i1_nxv32i1_26( %x, ptr %y) { ; VLS-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; VLS-NEXT: vmv.v.i v8, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 -; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma -; VLS-NEXT: vslidedown.vi v8, v9, 10 ; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma -; VLS-NEXT: vmsne.vi v0, v8, 0 ; VLS-NEXT: vmv.v.i v8, 0 +; VLS-NEXT: vsetivli zero, 2, e8, m1, ta, ma +; VLS-NEXT: vslidedown.vi v9, v9, 10 +; VLS-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; VLS-NEXT: vmsne.vi v0, v9, 0 ; VLS-NEXT: vmerge.vim v8, v8, 1, v0 ; VLS-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; VLS-NEXT: vmv.v.i v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index 7e45136372b6c..f613449856e09 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -626,11 +626,11 @@ define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV32NOM-NEXT: andi a0, a1, 31 ; RV32NOM-NEXT: li a1, 4 ; RV32NOM-NEXT: call __mulsi3 -; RV32NOM-NEXT: li a1, 32 -; RV32NOM-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV32NOM-NEXT: vle32.v v8, (s2) ; RV32NOM-NEXT: mv a1, sp +; RV32NOM-NEXT: li a2, 32 ; RV32NOM-NEXT: add a0, a1, a0 +; RV32NOM-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV32NOM-NEXT: vle32.v v8, (s2) ; RV32NOM-NEXT: vadd.vv v8, v8, v8 ; RV32NOM-NEXT: vse32.v v8, (a1) ; RV32NOM-NEXT: lw a0, 0(a0) @@ -649,14 +649,14 @@ define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV32M-NEXT: addi s0, sp, 256 ; RV32M-NEXT: andi sp, sp, -128 ; RV32M-NEXT: andi a1, a1, 31 -; RV32M-NEXT: li a2, 32 -; RV32M-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32M-NEXT: vle32.v v8, (a0) +; RV32M-NEXT: mv a2, sp +; RV32M-NEXT: li a3, 32 ; RV32M-NEXT: slli a1, a1, 2 -; RV32M-NEXT: mv a0, sp -; RV32M-NEXT: or a1, a0, a1 +; RV32M-NEXT: or a1, a2, a1 +; RV32M-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV32M-NEXT: vle32.v v8, (a0) ; RV32M-NEXT: vadd.vv v8, v8, v8 -; RV32M-NEXT: vse32.v v8, (a0) +; RV32M-NEXT: vse32.v v8, (a2) ; RV32M-NEXT: lw a0, 0(a1) ; RV32M-NEXT: addi sp, s0, -256 ; RV32M-NEXT: lw ra, 252(sp) # 4-byte Folded Reload @@ -676,11 +676,11 @@ define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV64NOM-NEXT: andi a0, a1, 31 ; RV64NOM-NEXT: li a1, 4 ; RV64NOM-NEXT: call __muldi3 -; RV64NOM-NEXT: li a1, 32 -; RV64NOM-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; RV64NOM-NEXT: vle32.v v8, (s2) ; RV64NOM-NEXT: mv a1, sp +; RV64NOM-NEXT: li a2, 32 ; RV64NOM-NEXT: add a0, a1, a0 +; RV64NOM-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; RV64NOM-NEXT: vle32.v v8, (s2) ; RV64NOM-NEXT: vadd.vv v8, v8, v8 ; RV64NOM-NEXT: vse32.v v8, (a1) ; RV64NOM-NEXT: lw a0, 0(a0) @@ -699,14 +699,14 @@ define i32 @extractelt_v32i32_idx(ptr %x, i32 zeroext %idx) nounwind { ; RV64M-NEXT: addi s0, sp, 256 ; RV64M-NEXT: andi sp, sp, -128 ; RV64M-NEXT: andi a1, a1, 31 -; RV64M-NEXT: li a2, 32 -; RV64M-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV64M-NEXT: vle32.v v8, (a0) +; RV64M-NEXT: mv a2, sp +; RV64M-NEXT: li a3, 32 ; RV64M-NEXT: slli a1, a1, 2 -; RV64M-NEXT: mv a0, sp -; RV64M-NEXT: or a1, a0, a1 +; RV64M-NEXT: or a1, a2, a1 +; RV64M-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; RV64M-NEXT: vle32.v v8, (a0) ; RV64M-NEXT: vadd.vv v8, v8, v8 -; RV64M-NEXT: vse32.v v8, (a0) +; RV64M-NEXT: vse32.v v8, (a2) ; RV64M-NEXT: lw a0, 0(a1) ; RV64M-NEXT: addi sp, s0, -256 ; RV64M-NEXT: ld ra, 248(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll index ab2d00b9b9137..c328d5fbe6b0a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fceil-constrained-sdnode.ll @@ -10,11 +10,11 @@ define <1 x half> @ceil_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -33,11 +33,11 @@ define <2 x half> @ceil_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -56,11 +56,11 @@ define <4 x half> @ceil_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -79,11 +79,11 @@ define <8 x half> @ceil_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -102,11 +102,11 @@ define <16 x half> @ceil_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -128,9 +128,9 @@ define <32 x half> @ceil_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -151,9 +151,9 @@ define <1 x float> @ceil_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -174,9 +174,9 @@ define <2 x float> @ceil_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -197,9 +197,9 @@ define <4 x float> @ceil_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -220,9 +220,9 @@ define <8 x float> @ceil_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -243,9 +243,9 @@ define <16 x float> @ceil_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -264,11 +264,11 @@ define <1 x double> @ceil_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -287,11 +287,11 @@ define <2 x double> @ceil_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -310,11 +310,11 @@ define <4 x double> @ceil_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -333,11 +333,11 @@ define <8 x double> @ceil_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 3 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll index c6ce7c1bbe8b4..ebb75357cdfe7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ffloor-constrained-sdnode.ll @@ -10,11 +10,11 @@ define <1 x half> @floor_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -33,11 +33,11 @@ define <2 x half> @floor_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -56,11 +56,11 @@ define <4 x half> @floor_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -79,11 +79,11 @@ define <8 x half> @floor_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -102,11 +102,11 @@ define <16 x half> @floor_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -128,9 +128,9 @@ define <32 x half> @floor_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -151,9 +151,9 @@ define <1 x float> @floor_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -174,9 +174,9 @@ define <2 x float> @floor_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -197,9 +197,9 @@ define <4 x float> @floor_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -220,9 +220,9 @@ define <8 x float> @floor_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -243,9 +243,9 @@ define <16 x float> @floor_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -264,11 +264,11 @@ define <1 x double> @floor_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -287,11 +287,11 @@ define <2 x double> @floor_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -310,11 +310,11 @@ define <4 x double> @floor_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -333,11 +333,11 @@ define <8 x double> @floor_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll index d500469003aea..6536021da0313 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll @@ -13,13 +13,13 @@ declare <2 x half> @llvm.vp.floor.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -35,12 +35,12 @@ define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -59,12 +59,12 @@ define <2 x half> @vp_floor_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -77,11 +77,11 @@ define <2 x half> @vp_floor_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -99,13 +99,13 @@ declare <4 x half> @llvm.vp.floor.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -121,12 +121,12 @@ define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -145,12 +145,12 @@ define <4 x half> @vp_floor_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,11 +163,11 @@ define <4 x half> @vp_floor_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -185,13 +185,13 @@ declare <8 x half> @llvm.vp.floor.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -207,12 +207,12 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -231,12 +231,12 @@ define <8 x half> @vp_floor_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,11 +249,11 @@ define <8 x half> @vp_floor_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -273,12 +273,12 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) -; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -295,12 +295,12 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -319,12 +319,12 @@ define <16 x half> @vp_floor_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_v16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -337,11 +337,11 @@ define <16 x half> @vp_floor_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,9 +363,9 @@ define <2 x float> @vp_floor_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -384,8 +384,8 @@ define <2 x float> @vp_floor_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -405,9 +405,9 @@ define <4 x float> @vp_floor_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -426,8 +426,8 @@ define <4 x float> @vp_floor_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -448,9 +448,9 @@ define <8 x float> @vp_floor_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -470,8 +470,8 @@ define <8 x float> @vp_floor_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -492,9 +492,9 @@ define <16 x float> @vp_floor_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,8 +514,8 @@ define <16 x float> @vp_floor_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -531,13 +531,13 @@ declare <2 x double> @llvm.vp.floor.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_floor_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -552,12 +552,12 @@ define <2 x double> @vp_floor_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext % define <2 x double> @vp_floor_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -575,12 +575,12 @@ define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -596,12 +596,12 @@ define <4 x double> @vp_floor_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % define <4 x double> @vp_floor_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -619,12 +619,12 @@ define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -640,12 +640,12 @@ define <8 x double> @vp_floor_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext % define <8 x double> @vp_floor_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI21_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -663,12 +663,12 @@ define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -684,12 +684,12 @@ define <15 x double> @vp_floor_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroe define <15 x double> @vp_floor_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v15f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI23_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -707,12 +707,12 @@ define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -728,12 +728,12 @@ define <16 x double> @vp_floor_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroe define <16 x double> @vp_floor_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_v16f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI25_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -762,8 +762,8 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -778,33 +778,33 @@ define <32 x double> @vp_floor_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll index 4f11e6c3c386a..dc5e2e213f781 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll @@ -59,16 +59,14 @@ define <2 x half> @vfmax_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmax_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -127,16 +125,14 @@ define <4 x half> @vfmax_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmax_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -197,15 +193,13 @@ define <8 x half> @vfmax_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmax_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -269,15 +263,13 @@ define <16 x half> @vfmax_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-LABEL: vfmax_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -587,7 +579,7 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 @@ -601,29 +593,29 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB24_2 +; CHECK-NEXT: bltu a2, a3, .LBB24_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB24_2: ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: mul a0, a0, a3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: vmv8r.v v8, v16 @@ -680,10 +672,10 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v16, v8, v16, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll index e17ad303eddb8..eeb9ba155764c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum.ll @@ -24,16 +24,14 @@ define <2 x half> @vfmax_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-LABEL: vfmax_v2f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -57,16 +55,14 @@ define <4 x half> @vfmax_v4f16_vv(<4 x half> %a, <4 x half> %b) { ; ZVFHMIN-LABEL: vfmax_v4f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -90,15 +86,13 @@ define <8 x half> @vfmax_v8f16_vv(<8 x half> %a, <8 x half> %b) { ; ZVFHMIN-LABEL: vfmax_v8f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -123,15 +117,13 @@ define <16 x half> @vfmax_v16f16_vv(<16 x half> %a, <16 x half> %b) { ; ZVFHMIN-LABEL: vfmax_v16f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -295,8 +287,8 @@ define <2 x half> @vfmax_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmax_v2f16_vv_nnana: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vfadd.vv v8, v8, v8 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 ; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0 @@ -332,8 +324,8 @@ define <2 x half> @vfmax_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmax_v2f16_vv_nnanb: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 ; ZVFH-NEXT: vfadd.vv v9, v9, v9 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 ; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll index 2e2103ad5e06d..546aa751c9c73 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum-vp.ll @@ -59,16 +59,14 @@ define <2 x half> @vfmin_vv_v2f16_unmasked(<2 x half> %va, <2 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmin_vv_v2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -127,16 +125,14 @@ define <4 x half> @vfmin_vv_v4f16_unmasked(<4 x half> %va, <4 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmin_vv_v4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -197,15 +193,13 @@ define <8 x half> @vfmin_vv_v8f16_unmasked(<8 x half> %va, <8 x half> %vb, i32 z ; ZVFHMIN-LABEL: vfmin_vv_v8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -269,15 +263,13 @@ define <16 x half> @vfmin_vv_v16f16_unmasked(<16 x half> %va, <16 x half> %vb, i ; ZVFHMIN-LABEL: vfmin_vv_v16f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -587,7 +579,7 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: slli a1, a1, 5 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v25, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 @@ -601,29 +593,29 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB24_2 +; CHECK-NEXT: bltu a2, a3, .LBB24_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB24_2: ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: mul a0, a0, a3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: vmv8r.v v8, v16 @@ -680,10 +672,10 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vmfeq.vv v25, v8, v8, v0.t ; CHECK-NEXT: vmv1r.v v0, v25 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v16, v8, v16, v0.t +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v24, v8, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: mul a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll index 1362055c4dabf..196915bf141d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fminimum.ll @@ -24,16 +24,14 @@ define <2 x half> @vfmin_v2f16_vv(<2 x half> %a, <2 x half> %b) { ; ZVFHMIN-LABEL: vfmin_v2f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -57,16 +55,14 @@ define <4 x half> @vfmin_v4f16_vv(<4 x half> %a, <4 x half> %b) { ; ZVFHMIN-LABEL: vfmin_v4f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -90,15 +86,13 @@ define <8 x half> @vfmin_v8f16_vv(<8 x half> %a, <8 x half> %b) { ; ZVFHMIN-LABEL: vfmin_v8f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -123,15 +117,13 @@ define <16 x half> @vfmin_v16f16_vv(<16 x half> %a, <16 x half> %b) { ; ZVFHMIN-LABEL: vfmin_v16f16_vv: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -295,8 +287,8 @@ define <2 x half> @vfmin_v2f16_vv_nnana(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmin_v2f16_vv_nnana: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vfadd.vv v8, v8, v8 +; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v10, v9, v8, v0 ; ZVFH-NEXT: vmfeq.vv v0, v8, v8 ; ZVFH-NEXT: vmerge.vvm v8, v8, v9, v0 @@ -332,8 +324,8 @@ define <2 x half> @vfmin_v2f16_vv_nnanb(<2 x half> %a, <2 x half> %b) { ; ZVFH-LABEL: vfmin_v2f16_vv_nnanb: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vmfeq.vv v0, v8, v8 ; ZVFH-NEXT: vfadd.vv v9, v9, v9 +; ZVFH-NEXT: vmfeq.vv v0, v8, v8 ; ZVFH-NEXT: vmerge.vvm v10, v8, v9, v0 ; ZVFH-NEXT: vmfeq.vv v0, v9, v9 ; ZVFH-NEXT: vmerge.vvm v8, v9, v8, v0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll index 3a7ded1537ef6..f192a053ac888 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fnearbyint-constrained-sdnode.ll @@ -12,17 +12,17 @@ define <2 x half> @nearbyint_v2f16(<2 x half> %v) strictfp { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <2 x half> @llvm.experimental.constrained.nearbyint.v2f16(<2 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x half> %r @@ -36,17 +36,17 @@ define <4 x half> @nearbyint_v4f16(<4 x half> %v) strictfp { ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <4 x half> @llvm.experimental.constrained.nearbyint.v4f16(<4 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x half> %r @@ -60,17 +60,17 @@ define <8 x half> @nearbyint_v8f16(<8 x half> %v) strictfp { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <8 x half> @llvm.experimental.constrained.nearbyint.v8f16(<8 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x half> %r @@ -84,17 +84,17 @@ define <16 x half> @nearbyint_v16f16(<16 x half> %v) strictfp { ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <16 x half> @llvm.experimental.constrained.nearbyint.v16f16(<16 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <16 x half> %r @@ -111,15 +111,15 @@ define <32 x half> @nearbyint_v32f16(<32 x half> %v) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <32 x half> @llvm.experimental.constrained.nearbyint.v32f16(<32 x half> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <32 x half> %r @@ -135,15 +135,15 @@ define <2 x float> @nearbyint_v2f32(<2 x float> %v) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <2 x float> @llvm.experimental.constrained.nearbyint.v2f32(<2 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x float> %r @@ -159,15 +159,15 @@ define <4 x float> @nearbyint_v4f32(<4 x float> %v) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <4 x float> @llvm.experimental.constrained.nearbyint.v4f32(<4 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x float> %r @@ -183,15 +183,15 @@ define <8 x float> @nearbyint_v8f32(<8 x float> %v) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <8 x float> @llvm.experimental.constrained.nearbyint.v8f32(<8 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x float> %r @@ -207,15 +207,15 @@ define <16 x float> @nearbyint_v16f32(<16 x float> %v) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <16 x float> @llvm.experimental.constrained.nearbyint.v16f32(<16 x float> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <16 x float> %r @@ -229,17 +229,17 @@ define <2 x double> @nearbyint_v2f64(<2 x double> %v) strictfp { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI9_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <2 x double> %r @@ -253,17 +253,17 @@ define <4 x double> @nearbyint_v4f64(<4 x double> %v) strictfp { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI10_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI10_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI10_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <4 x double> %r @@ -277,17 +277,17 @@ define <8 x double> @nearbyint_v8f64(<8 x double> %v) strictfp { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call <8 x double> @llvm.experimental.constrained.nearbyint.v8f64(<8 x double> %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret <8 x double> %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index e82891f90d85e..4c0186e7d219c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -46,9 +46,11 @@ define <4 x float> @hang_when_merging_stores_after_legalization(<8 x float> %x, ; CHECK-NEXT: vmadd.vx v14, a0, v12 ; CHECK-NEXT: li a0, 129 ; CHECK-NEXT: vmv.s.x v15, a0 -; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vcompress.vm v12, v8, v15 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v12, v10, v14, v0.t ; CHECK-NEXT: vmv1r.v v8, v12 ; CHECK-NEXT: ret @@ -1749,13 +1751,13 @@ define <8 x float> @buildvec_v8f32_zvl256(float %e0, float %e1, float %e2, float ; CHECK-NEXT: vsetivli zero, 8, e32, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v8, fa0 ; CHECK-NEXT: vfmv.v.f v9, fa4 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: vfslide1down.vf v9, v9, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 ; CHECK-NEXT: vfslide1down.vf v9, v9, fa6 ; CHECK-NEXT: vfslide1down.vf v10, v8, fa3 ; CHECK-NEXT: vfslide1down.vf v8, v9, fa7 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v10, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x float> poison, float %e0, i64 0 @@ -1800,13 +1802,13 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d ; CHECK-NEXT: vsetivli zero, 8, e64, m1, ta, mu ; CHECK-NEXT: vfmv.v.f v8, fa0 ; CHECK-NEXT: vfmv.v.f v9, fa4 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 ; CHECK-NEXT: vfslide1down.vf v9, v9, fa5 ; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 ; CHECK-NEXT: vfslide1down.vf v9, v9, fa6 ; CHECK-NEXT: vfslide1down.vf v10, v8, fa3 ; CHECK-NEXT: vfslide1down.vf v8, v9, fa7 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v10, 4, v0.t ; CHECK-NEXT: ret %v0 = insertelement <8 x double> poison, double %e0, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index ac78a252cf9cd..1d02918ac8a9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -41,7 +41,6 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; V128-NEXT: csrr a0, vlenb ; V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; V128-NEXT: vid.v v10 -; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: srli a0, a0, 3 ; V128-NEXT: vsrl.vi v10, v10, 1 ; V128-NEXT: vslidedown.vx v11, v10, a0 @@ -50,6 +49,7 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; V128-NEXT: vrgatherei16.vv v12, v9, v10 ; V128-NEXT: vrgatherei16.vv v15, v8, v11 ; V128-NEXT: vrgatherei16.vv v14, v8, v10 +; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; V128-NEXT: vmerge.vvm v8, v14, v12, v0 ; V128-NEXT: ret @@ -59,9 +59,9 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 -; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -71,8 +71,8 @@ define <4 x double> @interleave_v2f64(<2 x double> %x, <2 x double> %y) { ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v8, v11 +; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret @@ -258,8 +258,8 @@ define <64 x float> @interleave_v32f32(<32 x float> %x, <32 x float> %y) { ; V128-NEXT: vzext.vf2 v8, v24 ; V128-NEXT: addi a1, a1, -1366 ; V128-NEXT: vzext.vf2 v24, v0 -; V128-NEXT: vmv.s.x v0, a1 ; V128-NEXT: vsll.vx v8, v8, a0 +; V128-NEXT: vmv.s.x v0, a1 ; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v24, v8, v0 ; V128-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll index c14eae0b1de61..92374177d93e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-setcc.ll @@ -17,10 +17,10 @@ define void @fcmp_oeq_vv_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fcmp_oeq_vv_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10 ; ZVFHMIN-NEXT: vsm.v v8, (a2) @@ -45,10 +45,10 @@ define void @fcmp_oeq_vv_v8f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fcmp_oeq_vv_v8f16_nonans: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vmfeq.vv v8, v12, v10 ; ZVFHMIN-NEXT: vsm.v v8, (a2) @@ -173,10 +173,10 @@ define void @fcmp_olt_vv_v16f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v10, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 ; ZVFHMIN-NEXT: vsm.v v8, (a2) @@ -201,10 +201,10 @@ define void @fcmp_olt_vv_v16f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fcmp_olt_vv_v16f16_nonans: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v10, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vmflt.vv v8, v16, v12 ; ZVFHMIN-NEXT: vsm.v v8, (a2) @@ -345,10 +345,10 @@ define void @fcmp_ule_vv_v32f16_nonans(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: li a3, 32 ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v12, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v12 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v12, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vmfle.vv v8, v24, v16 ; ZVFHMIN-NEXT: vsm.v v8, (a2) @@ -535,11 +535,11 @@ define void @fcmp_ord_vv_v4f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fcmp_ord_vv_v4f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFH-NEXT: vle16.v v8, (a1) -; ZVFH-NEXT: vle16.v v9, (a0) -; ZVFH-NEXT: vmfeq.vv v8, v8, v8 +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vmfeq.vv v9, v9, v9 -; ZVFH-NEXT: vmand.mm v0, v9, v8 +; ZVFH-NEXT: vmfeq.vv v8, v8, v8 +; ZVFH-NEXT: vmand.mm v0, v8, v9 ; ZVFH-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; ZVFH-NEXT: vmv.v.i v8, 0 ; ZVFH-NEXT: vmerge.vim v8, v8, 1, v0 @@ -555,14 +555,14 @@ define void @fcmp_ord_vv_v4f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fcmp_ord_vv_v4f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v9, v10, v10 -; ZVFHMIN-NEXT: vmfeq.vv v8, v8, v8 -; ZVFHMIN-NEXT: vmand.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfeq.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmfeq.vv v9, v9, v9 +; ZVFHMIN-NEXT: vmand.mm v0, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; ZVFHMIN-NEXT: vmv.v.i v8, 0 ; ZVFHMIN-NEXT: vmerge.vim v8, v8, 1, v0 @@ -585,11 +585,11 @@ define void @fcmp_uno_vv_v4f16(ptr %x, ptr %y, ptr %z) { ; ZVFH-LABEL: fcmp_uno_vv_v4f16: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFH-NEXT: vle16.v v8, (a1) -; ZVFH-NEXT: vle16.v v9, (a0) -; ZVFH-NEXT: vmfne.vv v8, v8, v8 +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle16.v v9, (a1) ; ZVFH-NEXT: vmfne.vv v9, v9, v9 -; ZVFH-NEXT: vmor.mm v0, v9, v8 +; ZVFH-NEXT: vmfne.vv v8, v8, v8 +; ZVFH-NEXT: vmor.mm v0, v8, v9 ; ZVFH-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; ZVFH-NEXT: vmv.v.i v8, 0 ; ZVFH-NEXT: vmerge.vim v8, v8, 1, v0 @@ -605,14 +605,14 @@ define void @fcmp_uno_vv_v4f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fcmp_uno_vv_v4f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmfne.vv v9, v10, v10 -; ZVFHMIN-NEXT: vmfne.vv v8, v8, v8 -; ZVFHMIN-NEXT: vmor.mm v0, v8, v9 +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10 +; ZVFHMIN-NEXT: vmfne.vv v9, v9, v9 +; ZVFHMIN-NEXT: vmor.mm v0, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e8, mf8, ta, ma ; ZVFHMIN-NEXT: vmv.v.i v8, 0 ; ZVFHMIN-NEXT: vmerge.vim v8, v8, 1, v0 @@ -692,12 +692,13 @@ define void @fcmp_oeq_vf_v8f16_nonans(ptr %x, half %y, ptr %z) { define void @fcmp_une_vf_v4f32(ptr %x, float %y, ptr %z) { ; CHECK-LABEL: fcmp_une_vf_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma @@ -717,12 +718,13 @@ define void @fcmp_une_vf_v4f32(ptr %x, float %y, ptr %z) { define void @fcmp_une_vf_v4f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK-LABEL: fcmp_une_vf_v4f32_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma @@ -742,12 +744,13 @@ define void @fcmp_une_vf_v4f32_nonans(ptr %x, float %y, ptr %z) { define void @fcmp_ogt_vf_v2f64(ptr %x, double %y, ptr %z) { ; CHECK-LABEL: fcmp_ogt_vf_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -767,12 +770,13 @@ define void @fcmp_ogt_vf_v2f64(ptr %x, double %y, ptr %z) { define void @fcmp_ogt_vf_v2f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK-LABEL: fcmp_ogt_vf_v2f64_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmfgt.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -1333,12 +1337,13 @@ define void @fcmp_oeq_fv_v8f16_nonans(ptr %x, half %y, ptr %z) { define void @fcmp_une_fv_v4f32(ptr %x, float %y, ptr %z) { ; CHECK-LABEL: fcmp_une_fv_v4f32: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma @@ -1358,12 +1363,13 @@ define void @fcmp_une_fv_v4f32(ptr %x, float %y, ptr %z) { define void @fcmp_une_fv_v4f32_nonans(ptr %x, float %y, ptr %z) { ; CHECK-LABEL: fcmp_une_fv_v4f32_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vmfne.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma @@ -1383,12 +1389,13 @@ define void @fcmp_une_fv_v4f32_nonans(ptr %x, float %y, ptr %z) { define void @fcmp_ogt_fv_v2f64(ptr %x, double %y, ptr %z) { ; CHECK-LABEL: fcmp_ogt_fv_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma @@ -1408,12 +1415,13 @@ define void @fcmp_ogt_fv_v2f64(ptr %x, double %y, ptr %z) { define void @fcmp_ogt_fv_v2f64_nonans(ptr %x, double %y, ptr %z) { ; CHECK-LABEL: fcmp_ogt_fv_v2f64_nonans: ; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vmv.v.i v9, 0 +; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vmflt.vf v0, v8, fa0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v8, 0 -; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index 41d8abb9b73eb..8e288fec53778 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -41,10 +41,10 @@ define <8 x float> @shuffle_v8f32(<8 x float> %x, <8 x float> %y) { define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) { ; CHECK-LABEL: shuffle_fv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 9 +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 ; CHECK-NEXT: ret @@ -55,10 +55,10 @@ define <4 x double> @shuffle_fv_v4f64(<4 x double> %x) { define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) { ; CHECK-LABEL: shuffle_vf_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 6 +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfmerge.vfm v8, v8, fa5, v0 ; CHECK-NEXT: ret @@ -105,11 +105,12 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI7_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI7_0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vle16.v v14, (a0) -; CHECK-NEXT: vmv.v.i v0, 8 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 8 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vrgather.vi v12, v10, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -120,14 +121,16 @@ define <4 x double> @vrgather_shuffle_vv_v4f64(<4 x double> %x, <4 x double> %y) define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { ; CHECK-LABEL: vrgather_shuffle_xv_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI8_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI8_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: lui a0, %hi(.LCPI8_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI8_0)(a0) ; CHECK-NEXT: vrsub.vi v12, v10, 4 -; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfmv.v.f v10, fa5 +; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.i v0, 12 +; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vrgatherei16.vv v10, v8, v12, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -138,16 +141,16 @@ define <4 x double> @vrgather_shuffle_xv_v4f64(<4 x double> %x) { define <4 x double> @vrgather_shuffle_vx_v4f64(<4 x double> %x) { ; CHECK-LABEL: vrgather_shuffle_vx_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI9_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v10, 9 +; CHECK-NEXT: lui a0, %hi(.LCPI9_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI9_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vcompress.vm v12, v8, v10 +; CHECK-NEXT: vfmv.v.f v8, fa5 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-NEXT: vfmv.v.f v8, fa5 ; CHECK-NEXT: vmerge.vvm v8, v8, v12, v0 ; CHECK-NEXT: ret %s = shufflevector <4 x double> %x, <4 x double> , <4 x i32> @@ -331,8 +334,8 @@ define <4 x bfloat> @vrgather_shuffle_vv_v4bf16(<4 x bfloat> %x, <4 x bfloat> %y ; CHECK-NEXT: addi a0, a0, %lo(.LCPI25_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v11, (a0) -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -375,8 +378,8 @@ define <4 x half> @vrgather_shuffle_vv_v4f16(<4 x half> %x, <4 x half> %y) { ; CHECK-NEXT: addi a0, a0, %lo(.LCPI28_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v11, (a0) -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -399,18 +402,18 @@ define <4 x half> @vrgather_shuffle_vx_v4f16_load(ptr %p) { define <16 x float> @shuffle_disjoint_lanes(<16 x float> %v, <16 x float> %w) { ; CHECK-LABEL: shuffle_disjoint_lanes: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI30_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI30_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) ; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vsext.vf2 v18, v16 +; CHECK-NEXT: vsext.vf2 v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v8, v12, v18 +; CHECK-NEXT: vrgatherei16.vv v8, v12, v16 ; CHECK-NEXT: ret %out = shufflevector <16 x float> %v, <16 x float> %w, <16 x i32> ret <16 x float> %out @@ -437,12 +440,12 @@ define <16 x float> @shuffle_disjoint_lanes_one_broadcast(<16 x float> %v, <16 x ; CHECK-NEXT: lui a0, %hi(.LCPI32_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vle16.v v20, (a0) +; CHECK-NEXT: vrgather.vi v16, v8, 7 +; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 15 ; CHECK-NEXT: addi a0, a0, 240 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vi v16, v8, 7 -; CHECK-NEXT: vrgatherei16.vv v16, v12, v20, v0.t +; CHECK-NEXT: vrgatherei16.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %out = shufflevector <16 x float> %v, <16 x float> %w, <16 x i32> @@ -452,14 +455,14 @@ define <16 x float> @shuffle_disjoint_lanes_one_broadcast(<16 x float> %v, <16 x define <16 x float> @shuffle_disjoint_lanes_one_splat(float %v, <16 x float> %w) { ; CHECK-LABEL: shuffle_disjoint_lanes_one_splat: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu +; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: lui a0, %hi(.LCPI33_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI33_0) -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: lui a0, 15 ; CHECK-NEXT: addi a0, a0, 240 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vfmv.v.f v12, fa0 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll index 58b0a17cdccd6..fed76227a2b69 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-vrgather.ll @@ -53,8 +53,8 @@ define void @gather_const_v2f64(ptr %x) { define void @gather_const_v64f16(ptr %x) { ; CHECK-LABEL: gather_const_v64f16: ; CHECK: # %bb.0: -; CHECK-NEXT: flh fa5, 94(a0) ; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: flh fa5, 94(a0) ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v8, fa5 ; CHECK-NEXT: vse16.v v8, (a0) @@ -70,8 +70,8 @@ define void @gather_const_v64f16(ptr %x) { define void @gather_const_v32f32(ptr %x) { ; CHECK-LABEL: gather_const_v32f32: ; CHECK: # %bb.0: -; CHECK-NEXT: flw fa5, 68(a0) ; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: flw fa5, 68(a0) ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vfmv.v.f v8, fa5 ; CHECK-NEXT: vse32.v v8, (a0) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll index 585a331e55094..86c727199bbae 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp.ll @@ -9,10 +9,10 @@ define void @fadd_v8bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fadd_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v12, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -30,10 +30,10 @@ define void @fadd_v6bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fadd_v6bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfadd.vv v8, v12, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -60,10 +60,10 @@ define void @fadd_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fadd_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -90,10 +90,10 @@ define void @fadd_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fadd_v6f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -143,10 +143,10 @@ define void @fsub_v8bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fsub_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v12, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -164,10 +164,10 @@ define void @fsub_v6bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fsub_v6bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfsub.vv v8, v12, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -194,10 +194,10 @@ define void @fsub_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fsub_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -224,10 +224,10 @@ define void @fsub_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fsub_v6f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -277,10 +277,10 @@ define void @fmul_v8bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fmul_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v12, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -298,10 +298,10 @@ define void @fmul_v6bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fmul_v6bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v12, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -328,10 +328,10 @@ define void @fmul_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fmul_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -358,10 +358,10 @@ define void @fmul_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fmul_v6f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -411,10 +411,10 @@ define void @fdiv_v8bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fdiv_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v12, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -432,10 +432,10 @@ define void @fdiv_v6bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fdiv_v6bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v12, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -462,10 +462,10 @@ define void @fdiv_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fdiv_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -492,10 +492,10 @@ define void @fdiv_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fdiv_v6f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v12, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -757,13 +757,13 @@ define void @copysign_v8bf16(ptr %x, ptr %y) { ; CHECK-LABEL: copysign_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: lui a1, 8 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x bfloat>, ptr %x @@ -777,13 +777,13 @@ define void @copysign_v6bf16(ptr %x, ptr %y) { ; CHECK-LABEL: copysign_v6bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: lui a1, 8 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: vand.vx v9, v9, a1 -; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: vand.vx v8, v8, a1 +; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x bfloat>, ptr %x @@ -806,13 +806,13 @@ define void @copysign_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: copysign_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vand.vx v8, v8, a1 -; ZVFHMIN-NEXT: addi a1, a1, -1 ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 -; ZVFHMIN-NEXT: vor.vv v8, v9, v8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x @@ -835,13 +835,13 @@ define void @copysign_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: copysign_v6f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vand.vx v8, v8, a1 -; ZVFHMIN-NEXT: addi a1, a1, -1 ; ZVFHMIN-NEXT: vand.vx v9, v9, a1 -; ZVFHMIN-NEXT: vor.vv v8, v9, v8 +; ZVFHMIN-NEXT: addi a1, a1, -1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x @@ -1023,14 +1023,14 @@ define void @copysign_neg_v8bf16(ptr %x, ptr %y) { ; CHECK-LABEL: copysign_neg_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addi a2, a1, -1 -; CHECK-NEXT: vxor.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vxor.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x bfloat>, ptr %x @@ -1045,14 +1045,14 @@ define void @copysign_neg_v6bf16(ptr %x, ptr %y) { ; CHECK-LABEL: copysign_neg_v6bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addi a2, a1, -1 -; CHECK-NEXT: vxor.vx v8, v8, a1 -; CHECK-NEXT: vand.vx v9, v9, a2 -; CHECK-NEXT: vand.vx v8, v8, a1 -; CHECK-NEXT: vor.vv v8, v9, v8 +; CHECK-NEXT: vxor.vx v9, v9, a1 +; CHECK-NEXT: vand.vx v8, v8, a2 +; CHECK-NEXT: vand.vx v9, v9, a1 +; CHECK-NEXT: vor.vv v8, v8, v9 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x bfloat>, ptr %x @@ -1076,14 +1076,14 @@ define void @copysign_neg_v8f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: copysign_neg_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: addi a2, a1, -1 -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vand.vx v9, v9, a2 -; ZVFHMIN-NEXT: vand.vx v8, v8, a1 -; ZVFHMIN-NEXT: vor.vv v8, v9, v8 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a2 +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x @@ -1107,14 +1107,14 @@ define void @copysign_neg_v6f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: copysign_neg_v6f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: addi a2, a1, -1 -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vand.vx v9, v9, a2 -; ZVFHMIN-NEXT: vand.vx v8, v8, a1 -; ZVFHMIN-NEXT: vor.vv v8, v9, v8 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 +; ZVFHMIN-NEXT: vand.vx v8, v8, a2 +; ZVFHMIN-NEXT: vand.vx v9, v9, a1 +; ZVFHMIN-NEXT: vor.vv v8, v8, v9 ; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x @@ -1211,10 +1211,10 @@ define void @copysign_neg_trunc_v4f16_v4f32(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_neg_trunc_v4f16_v4f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; ZVFH-NEXT: vle32.v v8, (a1) -; ZVFH-NEXT: vle16.v v9, (a0) -; ZVFH-NEXT: vfncvt.f.f.w v10, v8 -; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10 +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfncvt.f.f.w v10, v9 +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v10 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; @@ -1245,10 +1245,10 @@ define void @copysign_neg_trunc_v3f16_v3f32(ptr %x, ptr %y) { ; ZVFH-LABEL: copysign_neg_trunc_v3f16_v3f32: ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetivli zero, 3, e16, mf2, ta, ma -; ZVFH-NEXT: vle32.v v8, (a1) -; ZVFH-NEXT: vle16.v v9, (a0) -; ZVFH-NEXT: vfncvt.f.f.w v10, v8 -; ZVFH-NEXT: vfsgnjn.vv v8, v9, v10 +; ZVFH-NEXT: vle16.v v8, (a0) +; ZVFH-NEXT: vle32.v v9, (a1) +; ZVFH-NEXT: vfncvt.f.f.w v10, v9 +; ZVFH-NEXT: vfsgnjn.vv v8, v8, v10 ; ZVFH-NEXT: vse16.v v8, (a0) ; ZVFH-NEXT: ret ; @@ -1279,11 +1279,11 @@ define void @copysign_neg_ext_v2f64_v2f32(ptr %x, ptr %y) { ; CHECK-LABEL: copysign_neg_ext_v2f64_v2f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vle64.v v9, (a0) -; CHECK-NEXT: vfwcvt.f.f.v v10, v8 +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle32.v v9, (a1) +; CHECK-NEXT: vfwcvt.f.f.v v10, v9 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma -; CHECK-NEXT: vfsgnjn.vv v8, v9, v10 +; CHECK-NEXT: vfsgnjn.vv v8, v8, v10 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret %a = load <2 x double>, ptr %x @@ -1417,17 +1417,17 @@ define void @fma_v8bf16(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fma_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a2) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vle16.v v10, (a1) -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v14, v12 +; CHECK-NEXT: vfmadd.vv v14, v10, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 -; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x bfloat>, ptr %x %b = load <8 x bfloat>, ptr %y @@ -1441,17 +1441,17 @@ define void @fma_v6bf16(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fma_v6bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a2) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vle16.v v10, (a1) -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v14, v12 +; CHECK-NEXT: vfmadd.vv v14, v10, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 -; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v14 +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x bfloat>, ptr %x %b = load <6 x bfloat>, ptr %y @@ -1475,17 +1475,17 @@ define void @fma_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fma_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a2) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vle16.v v10, (a1) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vle16.v v10, (a2) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 +; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y @@ -1509,17 +1509,17 @@ define void @fma_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fma_v6f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a2) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vle16.v v10, (a1) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vle16.v v10, (a2) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 +; ZVFHMIN-NEXT: vfmadd.vv v14, v10, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v14 +; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y @@ -1569,19 +1569,19 @@ define void @fmsub_v8bf16(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fmsub_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a2) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) ; CHECK-NEXT: lui a1, 8 -; CHECK-NEXT: vxor.vx v8, v8, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 +; CHECK-NEXT: vxor.vx v10, v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v12, v14 +; CHECK-NEXT: vfmadd.vv v10, v12, v14 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 -; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <8 x bfloat>, ptr %x %b = load <8 x bfloat>, ptr %y @@ -1596,19 +1596,19 @@ define void @fmsub_v6bf16(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fmsub_v6bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a2) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vle16.v v10, (a2) ; CHECK-NEXT: lui a1, 8 -; CHECK-NEXT: vxor.vx v8, v8, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 +; CHECK-NEXT: vxor.vx v10, v10, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v12, v14 +; CHECK-NEXT: vfmadd.vv v10, v12, v14 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v10, v8 -; CHECK-NEXT: vse16.v v10, (a0) +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <6 x bfloat>, ptr %x %b = load <6 x bfloat>, ptr %y @@ -1633,19 +1633,19 @@ define void @fmsub_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fmsub_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a2) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vle16.v v10, (a2) ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14 +; ZVFHMIN-NEXT: vfmadd.vv v10, v12, v14 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <8 x half>, ptr %x %b = load <8 x half>, ptr %y @@ -1670,19 +1670,19 @@ define void @fmsub_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fmsub_v6f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a2) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vle16.v v10, (a2) ; ZVFHMIN-NEXT: lui a1, 8 -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 +; ZVFHMIN-NEXT: vxor.vx v10, v10, a1 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14 +; ZVFHMIN-NEXT: vfmadd.vv v10, v12, v14 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v10, v8 -; ZVFHMIN-NEXT: vse16.v v10, (a0) +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 +; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <6 x half>, ptr %x %b = load <6 x half>, ptr %y @@ -1736,10 +1736,10 @@ define void @fadd_v16bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fadd_v16bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v10, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfadd.vv v8, v16, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -1766,10 +1766,10 @@ define void @fadd_v16f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fadd_v16f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v10, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfadd.vv v8, v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -1819,10 +1819,10 @@ define void @fsub_v16bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fsub_v16bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v10, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfsub.vv v8, v16, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -1849,10 +1849,10 @@ define void @fsub_v16f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fsub_v16f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v10, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfsub.vv v8, v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -1902,10 +1902,10 @@ define void @fmul_v16bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fmul_v16bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v10, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfmul.vv v8, v16, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -1932,10 +1932,10 @@ define void @fmul_v16f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fmul_v16f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v10, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -1985,10 +1985,10 @@ define void @fdiv_v16bf16(ptr %x, ptr %y) { ; CHECK-LABEL: fdiv_v16bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v10, (a0) -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfdiv.vv v8, v16, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -2015,10 +2015,10 @@ define void @fdiv_v16f16(ptr %x, ptr %y) { ; ZVFHMIN-LABEL: fdiv_v16f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v10, (a0) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfdiv.vv v8, v16, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma @@ -2134,17 +2134,17 @@ define void @fma_v16bf16(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fma_v16bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vle16.v v8, (a2) -; CHECK-NEXT: vle16.v v10, (a0) -; CHECK-NEXT: vle16.v v12, (a1) -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v10, (a1) +; CHECK-NEXT: vle16.v v12, (a2) +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v12 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v10 -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v20, v16 +; CHECK-NEXT: vfmadd.vv v20, v12, v16 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfncvtbf16.f.f.w v12, v8 -; CHECK-NEXT: vse16.v v12, (a0) +; CHECK-NEXT: vfncvtbf16.f.f.w v8, v20 +; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x bfloat>, ptr %x %b = load <16 x bfloat>, ptr %y @@ -2168,17 +2168,17 @@ define void @fma_v16f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fma_v16f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a2) -; ZVFHMIN-NEXT: vle16.v v10, (a0) -; ZVFHMIN-NEXT: vle16.v v12, (a1) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v10, (a1) +; ZVFHMIN-NEXT: vle16.v v12, (a2) +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v20, v10 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vfmadd.vv v8, v20, v16 +; ZVFHMIN-NEXT: vfmadd.vv v20, v12, v16 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v8 -; ZVFHMIN-NEXT: vse16.v v12, (a0) +; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v20 +; ZVFHMIN-NEXT: vse16.v v8, (a0) ; ZVFHMIN-NEXT: ret %a = load <16 x half>, ptr %x %b = load <16 x half>, ptr %y @@ -3347,13 +3347,13 @@ define void @fdiv_fv_v2f64(ptr %x, double %y) { define void @fma_vf_v8bf16(ptr %x, ptr %y, bfloat %z) { ; CHECK-LABEL: fma_vf_v8bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a2, fa0 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: fmv.x.w a1, fa0 -; CHECK-NEXT: vmv.v.x v10, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vmv.v.x v10, a2 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v14, v12 @@ -3373,13 +3373,13 @@ define void @fma_vf_v8bf16(ptr %x, ptr %y, bfloat %z) { define void @fma_vf_v6bf16(ptr %x, ptr %y, bfloat %z) { ; CHECK-LABEL: fma_vf_v6bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a2, fa0 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: fmv.x.w a1, fa0 -; CHECK-NEXT: vmv.v.x v10, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vmv.v.x v10, a2 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v14, v12 @@ -3408,13 +3408,13 @@ define void @fma_vf_v8f16(ptr %x, ptr %y, half %z) { ; ; ZVFHMIN-LABEL: fma_vf_v8f16: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a2, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vmv.v.x v10, a2 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 @@ -3443,13 +3443,13 @@ define void @fma_vf_v6f16(ptr %x, ptr %y, half %z) { ; ; ZVFHMIN-LABEL: fma_vf_v6f16: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a2, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vmv.v.x v10, a2 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 @@ -3505,13 +3505,13 @@ define void @fma_vf_v2f64(ptr %x, ptr %y, double %z) { define void @fma_fv_v8bf16(ptr %x, ptr %y, bfloat %z) { ; CHECK-LABEL: fma_fv_v8bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a2, fa0 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: fmv.x.w a1, fa0 -; CHECK-NEXT: vmv.v.x v10, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vmv.v.x v10, a2 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v14, v12 @@ -3531,13 +3531,13 @@ define void @fma_fv_v8bf16(ptr %x, ptr %y, bfloat %z) { define void @fma_fv_v6bf16(ptr %x, ptr %y, bfloat %z) { ; CHECK-LABEL: fma_fv_v6bf16: ; CHECK: # %bb.0: +; CHECK-NEXT: fmv.x.w a2, fa0 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: fmv.x.w a1, fa0 -; CHECK-NEXT: vmv.v.x v10, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) +; CHECK-NEXT: vmv.v.x v10, a2 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v14, v12 @@ -3566,13 +3566,13 @@ define void @fma_fv_v8f16(ptr %x, ptr %y, half %z) { ; ; ZVFHMIN-LABEL: fma_fv_v8f16: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a2, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vmv.v.x v10, a2 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 @@ -3601,13 +3601,13 @@ define void @fma_fv_v6f16(ptr %x, ptr %y, half %z) { ; ; ZVFHMIN-LABEL: fma_fv_v6f16: ; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: fmv.x.w a2, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) -; ZVFHMIN-NEXT: fmv.x.w a1, fa0 -; ZVFHMIN-NEXT: vmv.v.x v10, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) +; ZVFHMIN-NEXT: vmv.v.x v10, a2 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v14, v12 @@ -3665,13 +3665,13 @@ define void @fmsub_vf_v8bf16(ptr %x, ptr %y, bfloat %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.w a2, fa0 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: vmv.v.x v10, a2 -; CHECK-NEXT: vxor.vx v8, v8, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 +; CHECK-NEXT: vxor.vx v9, v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v14 @@ -3694,13 +3694,13 @@ define void @fmsub_vf_v6bf16(ptr %x, ptr %y, bfloat %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: fmv.x.w a2, fa0 ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: vmv.v.x v10, a2 -; CHECK-NEXT: vxor.vx v8, v8, a1 -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 +; CHECK-NEXT: vxor.vx v9, v9, a1 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 ; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmadd.vv v8, v12, v14 @@ -3732,13 +3732,13 @@ define void @fmsub_vf_v8f16(ptr %x, ptr %y, half %z) { ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.w a2, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vmv.v.x v10, a2 -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14 @@ -3770,13 +3770,13 @@ define void @fmsub_vf_v6f16(ptr %x, ptr %y, half %z) { ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: fmv.x.w a2, fa0 ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: lui a1, 8 ; ZVFHMIN-NEXT: vmv.v.x v10, a2 -; ZVFHMIN-NEXT: vxor.vx v8, v8, a1 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 +; ZVFHMIN-NEXT: vxor.vx v9, v9, a1 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v10 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmadd.vv v8, v12, v14 @@ -4057,11 +4057,11 @@ define void @ceil_v8bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4084,12 +4084,12 @@ define void @ceil_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 @@ -4113,9 +4113,9 @@ define void @ceil_v8f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI177_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI177_0)(a1) +; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4130,11 +4130,11 @@ define void @ceil_v8f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 3 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 3 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4157,10 +4157,10 @@ define void @ceil_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI178_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI178_0)(a1) +; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 3 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 @@ -4176,12 +4176,12 @@ define void @ceil_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 3 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 3 ; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 @@ -4205,9 +4205,9 @@ define void @ceil_v4f32(ptr %x) { ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4228,9 +4228,9 @@ define void @ceil_v2f64(ptr %x) { ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI180_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI180_0)(a1) +; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 3 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4251,11 +4251,11 @@ define void @floor_v8bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4278,12 +4278,12 @@ define void @floor_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 @@ -4307,9 +4307,9 @@ define void @floor_v8f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI183_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI183_0)(a1) +; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4324,11 +4324,11 @@ define void @floor_v8f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 2 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4351,10 +4351,10 @@ define void @floor_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI184_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI184_0)(a1) +; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 2 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 @@ -4370,12 +4370,12 @@ define void @floor_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 2 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 2 ; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 @@ -4399,9 +4399,9 @@ define void @floor_v4f32(ptr %x) { ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4422,9 +4422,9 @@ define void @floor_v2f64(ptr %x) { ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI186_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI186_0)(a1) +; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 2 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4445,11 +4445,11 @@ define void @round_v8bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4472,12 +4472,12 @@ define void @round_v6bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a1 @@ -4501,9 +4501,9 @@ define void @round_v8f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI189_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI189_0)(a1) +; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4518,11 +4518,11 @@ define void @round_v8f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -4545,10 +4545,10 @@ define void @round_v6f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI190_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI190_0)(a1) +; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vsetivli zero, 6, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a1 @@ -4564,12 +4564,12 @@ define void @round_v6f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a1 @@ -4593,9 +4593,9 @@ define void @round_v4f32(ptr %x) { ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4616,9 +4616,9 @@ define void @round_v2f64(ptr %x) { ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI192_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI192_0)(a1) +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -4746,11 +4746,11 @@ define void @nearbyint_v8bf16(ptr %x) { ; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: fsflags a1 @@ -4773,9 +4773,9 @@ define void @nearbyint_v8f16(ptr %x) { ; ZVFH-NEXT: vle16.v v8, (a0) ; ZVFH-NEXT: lui a1, %hi(.LCPI198_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI198_0)(a1) +; ZVFH-NEXT: frflags a1 ; ZVFH-NEXT: vfabs.v v9, v8 ; ZVFH-NEXT: vmflt.vf v0, v9, fa5 -; ZVFH-NEXT: frflags a1 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t ; ZVFH-NEXT: fsflags a1 @@ -4790,11 +4790,11 @@ define void @nearbyint_v8f16(ptr %x) { ; ZVFHMIN-NEXT: vle16.v v8, (a0) ; ZVFHMIN-NEXT: lui a1, 307200 ; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: frflags a1 ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: fsflags a1 @@ -4817,9 +4817,9 @@ define void @nearbyint_v4f32(ptr %x) { ; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: lui a1, 307200 ; CHECK-NEXT: fmv.w.x fa5, a1 +; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a1 @@ -4840,9 +4840,9 @@ define void @nearbyint_v2f64(ptr %x) { ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI200_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI200_0)(a1) +; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t ; CHECK-NEXT: fsflags a1 @@ -4860,11 +4860,11 @@ define void @fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fmuladd_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v14, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -4889,11 +4889,11 @@ define void @fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fmuladd_v6bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v14, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -4928,11 +4928,11 @@ define void @fmuladd_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fmuladd_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: vle16.v v10, (a2) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -4967,11 +4967,11 @@ define void @fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fmuladd_v6f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: vle16.v v10, (a2) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -5032,11 +5032,11 @@ define void @fmsub_fmuladd_v8bf16(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fmsub_fmuladd_v8bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v14, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -5062,11 +5062,11 @@ define void @fmsub_fmuladd_v6bf16(ptr %x, ptr %y, ptr %z) { ; CHECK-LABEL: fmsub_fmuladd_v6bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vle16.v v9, (a0) +; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 -; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfmul.vv v8, v14, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -5102,11 +5102,11 @@ define void @fmsub_fmuladd_v8f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fmsub_fmuladd_v8f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: vle16.v v10, (a2) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma @@ -5142,11 +5142,11 @@ define void @fmsub_fmuladd_v6f16(ptr %x, ptr %y, ptr %z) { ; ZVFHMIN-LABEL: fmsub_fmuladd_v6f16: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; ZVFHMIN-NEXT: vle16.v v8, (a1) -; ZVFHMIN-NEXT: vle16.v v9, (a0) +; ZVFHMIN-NEXT: vle16.v v8, (a0) +; ZVFHMIN-NEXT: vle16.v v9, (a1) ; ZVFHMIN-NEXT: vle16.v v10, (a2) -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 -; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v14, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfmul.vv v8, v14, v12 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll index a1466d46f1ba7..5106ec1189327 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpext-vp.ll @@ -96,9 +96,9 @@ declare <32 x double> @llvm.vp.fpext.v32f64.v32f32(<32 x float>, <32 x i1>, i32) define <32 x double> @vfpext_v32f32_v32f64(<32 x float> %a, <32 x i1> %m, i32 zeroext %vl) { ; CHECK-LABEL: vfpext_v32f32_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll index c6b8b602718b7..c18d8639dc91c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fpowi.ll @@ -743,8 +743,8 @@ define <16 x float> @powi_v16f32(<16 x float> %x, i32 %y) nounwind { ; RV64-NEXT: addi a1, sp, 64 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vse32.v v8, (a1) -; RV64-NEXT: flw fa0, 124(sp) ; RV64-NEXT: sext.w s2, a0 +; RV64-NEXT: flw fa0, 124(sp) ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __powisf2 ; RV64-NEXT: fsw fa0, 188(sp) @@ -1188,8 +1188,8 @@ define <8 x double> @powi_v8f64(<8 x double> %x, i32 %y) nounwind { ; RV64-NEXT: addi a1, sp, 64 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vse64.v v8, (a1) -; RV64-NEXT: fld fa0, 120(sp) ; RV64-NEXT: sext.w s2, a0 +; RV64-NEXT: fld fa0, 120(sp) ; RV64-NEXT: mv a0, s2 ; RV64-NEXT: call __powidf2 ; RV64-NEXT: fsd fa0, 184(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll index f6c992280c6e3..e4609f1e9313d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptosi-vp.ll @@ -394,9 +394,9 @@ declare <32 x i64> @llvm.vp.fptosi.v32i64.v32f64(<32 x double>, <32 x i1>, i32) define <32 x i64> @vfptosi_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptosi_v32i64_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll index af225f4d95aa2..846675cf5a9b4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptoui-vp.ll @@ -394,9 +394,9 @@ declare <32 x i64> @llvm.vp.fptoui.v32i64.v32f64(<32 x double>, <32 x i1>, i32) define <32 x i64> @vfptoui_v32i64_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfptoui_v32i64_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll index 582706e4dfa18..ae53abc3f8c9a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fptrunc-vp.ll @@ -99,8 +99,8 @@ define <32 x float> @vfptrunc_v32f32_v32f64(<32 x double> %a, <32 x i1> %m, i32 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v12, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB7_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll index be32c033fe373..751a6e45c0c3c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround-constrained-sdnode.ll @@ -12,11 +12,11 @@ define <1 x half> @round_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -35,11 +35,11 @@ define <2 x half> @round_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -58,11 +58,11 @@ define <4 x half> @round_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -81,11 +81,11 @@ define <8 x half> @round_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -104,11 +104,11 @@ define <16 x half> @round_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -130,9 +130,9 @@ define <32 x half> @round_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -153,9 +153,9 @@ define <1 x float> @round_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -176,9 +176,9 @@ define <2 x float> @round_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -199,9 +199,9 @@ define <4 x float> @round_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -222,9 +222,9 @@ define <8 x float> @round_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -245,9 +245,9 @@ define <16 x float> @round_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -266,11 +266,11 @@ define <1 x double> @round_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -289,11 +289,11 @@ define <2 x double> @round_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -312,11 +312,11 @@ define <4 x double> @round_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -335,11 +335,11 @@ define <8 x double> @round_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll index 774ce5c7859c9..2bf3e9596597d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fround.ll @@ -13,12 +13,12 @@ define <1 x half> @round_v1f16(<1 x half> %x) { ; ZVFH-LABEL: round_v1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,11 +31,11 @@ define <1 x half> @round_v1f16(<1 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -52,12 +52,12 @@ declare <1 x half> @llvm.round.v1f16(<1 x half>) define <2 x half> @round_v2f16(<2 x half> %x) { ; ZVFH-LABEL: round_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -70,11 +70,11 @@ define <2 x half> @round_v2f16(<2 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -91,12 +91,12 @@ declare <2 x half> @llvm.round.v2f16(<2 x half>) define <4 x half> @round_v4f16(<4 x half> %x) { ; ZVFH-LABEL: round_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -109,11 +109,11 @@ define <4 x half> @round_v4f16(<4 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -130,12 +130,12 @@ declare <4 x half> @llvm.round.v4f16(<4 x half>) define <8 x half> @round_v8f16(<8 x half> %x) { ; ZVFH-LABEL: round_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -148,11 +148,11 @@ define <8 x half> @round_v8f16(<8 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -169,12 +169,12 @@ declare <8 x half> @llvm.round.v8f16(<8 x half>) define <16 x half> @round_v16f16(<16 x half> %x) { ; ZVFH-LABEL: round_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -187,11 +187,11 @@ define <16 x half> @round_v16f16(<16 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -208,15 +208,15 @@ declare <16 x half> @llvm.round.v16f16(<16 x half>) define <32 x half> @round_v32f16(<32 x half> %x) { ; ZVFH-LABEL: round_v32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: li a0, 32 +; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: fsrmi a1, 4 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t -; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t @@ -226,15 +226,15 @@ define <32 x half> @round_v32f16(<32 x half> %x) { ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: li a0, 32 ; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 4 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t -; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t @@ -253,8 +253,8 @@ define <1 x float> @round_v1f32(<1 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -273,8 +273,8 @@ define <2 x float> @round_v2f32(<2 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -293,8 +293,8 @@ define <4 x float> @round_v4f32(<4 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -313,8 +313,8 @@ define <8 x float> @round_v8f32(<8 x float> %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -333,8 +333,8 @@ define <16 x float> @round_v16f32(<16 x float> %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -349,12 +349,12 @@ declare <16 x float> @llvm.round.v16f32(<16 x float>) define <1 x double> @round_v1f64(<1 x double> %x) { ; CHECK-LABEL: round_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -369,12 +369,12 @@ declare <1 x double> @llvm.round.v1f64(<1 x double>) define <2 x double> @round_v2f64(<2 x double> %x) { ; CHECK-LABEL: round_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -389,12 +389,12 @@ declare <2 x double> @llvm.round.v2f64(<2 x double>) define <4 x double> @round_v4f64(<4 x double> %x) { ; CHECK-LABEL: round_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -409,12 +409,12 @@ declare <4 x double> @llvm.round.v4f64(<4 x double>) define <8 x double> @round_v8f64(<8 x double> %x) { ; CHECK-LABEL: round_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll index 5c0279e133dfa..c61e707bd89f0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven-constrained-sdnode.ll @@ -12,11 +12,11 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -35,11 +35,11 @@ define <2 x half> @roundeven_v2f16(<2 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -58,11 +58,11 @@ define <4 x half> @roundeven_v4f16(<4 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -81,11 +81,11 @@ define <8 x half> @roundeven_v8f16(<8 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -104,11 +104,11 @@ define <16 x half> @roundeven_v16f16(<16 x half> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -130,9 +130,9 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) strictfp { ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -153,9 +153,9 @@ define <1 x float> @roundeven_v1f32(<1 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -176,9 +176,9 @@ define <2 x float> @roundeven_v2f32(<2 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -199,9 +199,9 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -222,9 +222,9 @@ define <8 x float> @roundeven_v8f32(<8 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -245,9 +245,9 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -266,11 +266,11 @@ define <1 x double> @roundeven_v1f64(<1 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -289,11 +289,11 @@ define <2 x double> @roundeven_v2f64(<2 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -312,11 +312,11 @@ define <4 x double> @roundeven_v4f64(<4 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -335,11 +335,11 @@ define <8 x double> @roundeven_v8f64(<8 x double> %x) strictfp { ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll index 0b6baad127643..697fc657af5d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-froundeven.ll @@ -13,12 +13,12 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) { ; ZVFH-LABEL: roundeven_v1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; ZVFH-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -31,11 +31,11 @@ define <1 x half> @roundeven_v1f16(<1 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 1, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -52,12 +52,12 @@ declare <1 x half> @llvm.roundeven.v1f16(<1 x half>) define <2 x half> @roundeven_v2f16(<2 x half> %x) { ; ZVFH-LABEL: roundeven_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -70,11 +70,11 @@ define <2 x half> @roundeven_v2f16(<2 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -91,12 +91,12 @@ declare <2 x half> @llvm.roundeven.v2f16(<2 x half>) define <4 x half> @roundeven_v4f16(<4 x half> %x) { ; ZVFH-LABEL: roundeven_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; ZVFH-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -109,11 +109,11 @@ define <4 x half> @roundeven_v4f16(<4 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -130,12 +130,12 @@ declare <4 x half> @llvm.roundeven.v4f16(<4 x half>) define <8 x half> @roundeven_v8f16(<8 x half> %x) { ; ZVFH-LABEL: roundeven_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -148,11 +148,11 @@ define <8 x half> @roundeven_v8f16(<8 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -169,12 +169,12 @@ declare <8 x half> @llvm.roundeven.v8f16(<8 x half>) define <16 x half> @roundeven_v16f16(<16 x half> %x) { ; ZVFH-LABEL: roundeven_v16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; ZVFH-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -187,11 +187,11 @@ define <16 x half> @roundeven_v16f16(<16 x half> %x) { ; ZVFHMIN-NEXT: vsetivli zero, 16, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -208,15 +208,15 @@ declare <16 x half> @llvm.roundeven.v16f16(<16 x half>) define <32 x half> @roundeven_v32f16(<32 x half> %x) { ; ZVFH-LABEL: roundeven_v32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: li a0, 32 +; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) +; ZVFH-NEXT: fsrmi a1, 0 ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 ; ZVFH-NEXT: vmflt.vf v0, v12, fa5 -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t -; ZVFH-NEXT: fsrm a0 +; ZVFH-NEXT: fsrm a1 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t @@ -226,15 +226,15 @@ define <32 x half> @roundeven_v32f16(<32 x half> %x) { ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: li a0, 32 ; ZVFHMIN-NEXT: lui a1, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a1 +; ZVFHMIN-NEXT: fsrmi a1, 0 ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 -; ZVFHMIN-NEXT: fmv.w.x fa5, a1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t -; ZVFHMIN-NEXT: fsrm a0 +; ZVFHMIN-NEXT: fsrm a1 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t @@ -253,8 +253,8 @@ define <1 x float> @roundeven_v1f32(<1 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -273,8 +273,8 @@ define <2 x float> @roundeven_v2f32(<2 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -293,8 +293,8 @@ define <4 x float> @roundeven_v4f32(<4 x float> %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -313,8 +313,8 @@ define <8 x float> @roundeven_v8f32(<8 x float> %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -333,8 +333,8 @@ define <16 x float> @roundeven_v16f32(<16 x float> %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -349,12 +349,12 @@ declare <16 x float> @llvm.roundeven.v16f32(<16 x float>) define <1 x double> @roundeven_v1f64(<1 x double> %x) { ; CHECK-LABEL: roundeven_v1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI11_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -369,12 +369,12 @@ declare <1 x double> @llvm.roundeven.v1f64(<1 x double>) define <2 x double> @roundeven_v2f64(<2 x double> %x) { ; CHECK-LABEL: roundeven_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI12_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -389,12 +389,12 @@ declare <2 x double> @llvm.roundeven.v2f64(<2 x double>) define <4 x double> @roundeven_v4f64(<4 x double> %x) { ; CHECK-LABEL: roundeven_v4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI13_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -409,12 +409,12 @@ declare <4 x double> @llvm.roundeven.v4f64(<4 x double>) define <8 x double> @roundeven_v8f64(<8 x double> %x) { ; CHECK-LABEL: roundeven_v8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI14_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll index 62e7e3b109902..82d740d3113eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll @@ -285,14 +285,14 @@ define void @insert_v8i32_v2i32_0(ptr %vp, ptr %svp) { define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) { ; VLA-LABEL: insert_v8i32_v2i32_2: ; VLA: # %bb.0: -; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; VLA-NEXT: vle32.v v8, (a0) ; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; VLA-NEXT: vle32.v v10, (a1) +; VLA-NEXT: vle32.v v8, (a1) +; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; VLA-NEXT: vle32.v v10, (a0) ; VLA-NEXT: vsetivli zero, 4, e32, m2, tu, ma -; VLA-NEXT: vslideup.vi v8, v10, 2 +; VLA-NEXT: vslideup.vi v10, v8, 2 ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; VLA-NEXT: vse32.v v8, (a0) +; VLA-NEXT: vse32.v v10, (a0) ; VLA-NEXT: ret ; ; VLS-LABEL: insert_v8i32_v2i32_2: @@ -314,13 +314,12 @@ define void @insert_v8i32_v2i32_2(ptr %vp, ptr %svp) { define void @insert_v8i32_v2i32_6(ptr %vp, ptr %svp) { ; VLA-LABEL: insert_v8i32_v2i32_6: ; VLA: # %bb.0: -; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; VLA-NEXT: vle32.v v8, (a0) ; VLA-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; VLA-NEXT: vle32.v v10, (a1) +; VLA-NEXT: vle32.v v8, (a1) ; VLA-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; VLA-NEXT: vslideup.vi v8, v10, 6 -; VLA-NEXT: vse32.v v8, (a0) +; VLA-NEXT: vle32.v v10, (a0) +; VLA-NEXT: vslideup.vi v10, v8, 6 +; VLA-NEXT: vse32.v v10, (a0) ; VLA-NEXT: ret ; ; VLS-LABEL: insert_v8i32_v2i32_6: @@ -830,13 +829,13 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) { ; RV32VLS-NEXT: vl1re64.v v8, (a0) ; RV32VLS-NEXT: addi a0, sp, 128 ; RV32VLS-NEXT: vs1r.v v8, (a0) -; RV32VLS-NEXT: addi a0, sp, 192 -; RV32VLS-NEXT: vl8re64.v v8, (a0) ; RV32VLS-NEXT: addi a0, sp, 64 +; RV32VLS-NEXT: vl8re64.v v8, (a0) +; RV32VLS-NEXT: addi a0, sp, 192 ; RV32VLS-NEXT: vl8re64.v v16, (a0) ; RV32VLS-NEXT: addi a0, a1, 128 -; RV32VLS-NEXT: vs8r.v v8, (a0) -; RV32VLS-NEXT: vs8r.v v16, (a1) +; RV32VLS-NEXT: vs8r.v v16, (a0) +; RV32VLS-NEXT: vs8r.v v8, (a1) ; RV32VLS-NEXT: addi sp, s0, -80 ; RV32VLS-NEXT: .cfi_def_cfa sp, 80 ; RV32VLS-NEXT: lw ra, 76(sp) # 4-byte Folded Reload @@ -862,13 +861,13 @@ define void @insert_v2i64_nxv16i64_hi(ptr %psv, ptr %out) { ; RV64VLS-NEXT: vl1re64.v v8, (a0) ; RV64VLS-NEXT: addi a0, sp, 128 ; RV64VLS-NEXT: vs1r.v v8, (a0) -; RV64VLS-NEXT: addi a0, sp, 192 -; RV64VLS-NEXT: vl8re64.v v8, (a0) ; RV64VLS-NEXT: addi a0, sp, 64 +; RV64VLS-NEXT: vl8re64.v v8, (a0) +; RV64VLS-NEXT: addi a0, sp, 192 ; RV64VLS-NEXT: vl8re64.v v16, (a0) ; RV64VLS-NEXT: addi a0, a1, 128 -; RV64VLS-NEXT: vs8r.v v8, (a0) -; RV64VLS-NEXT: vs8r.v v16, (a1) +; RV64VLS-NEXT: vs8r.v v16, (a0) +; RV64VLS-NEXT: vs8r.v v8, (a1) ; RV64VLS-NEXT: addi sp, s0, -80 ; RV64VLS-NEXT: .cfi_def_cfa sp, 80 ; RV64VLS-NEXT: ld ra, 72(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll index 6782b2003ba94..ae0736682c9dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert.ll @@ -542,11 +542,11 @@ define void @insertelt_c6_v8i64_0_add(ptr %x, ptr %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vle64.v v12, (a1) -; CHECK-NEXT: li a1, 6 +; CHECK-NEXT: li a2, 6 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, tu, ma -; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma +; CHECK-NEXT: vle64.v v12, (a1) ; CHECK-NEXT: vadd.vv v8, v8, v12 ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index c628a0d620498..b8e299e67fc04 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -530,10 +530,10 @@ define void @buildvec_dominant0_v2i32(ptr %x) { ; ; RV64V-LABEL: buildvec_dominant0_v2i32: ; RV64V: # %bb.0: -; RV64V-NEXT: lui a1, %hi(.LCPI40_0) -; RV64V-NEXT: ld a1, %lo(.LCPI40_0)(a1) ; RV64V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.i v8, -1 +; RV64V-NEXT: lui a1, %hi(.LCPI40_0) +; RV64V-NEXT: ld a1, %lo(.LCPI40_0)(a1) ; RV64V-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV64V-NEXT: vmv.s.x v8, a1 ; RV64V-NEXT: vse64.v v8, (a0) @@ -698,15 +698,16 @@ define void @buildvec_seq_v9i8(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 73 ; CHECK-NEXT: vsetivli zero, 9, e8, m1, ta, ma -; CHECK-NEXT: vmv.v.i v9, 3 +; CHECK-NEXT: vmv.v.i v8, 3 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: li a1, 146 -; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v9, 2, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret store <9 x i8> , ptr %x @@ -935,11 +936,13 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_1() vscale_range(16, ; RV32-NEXT: li a0, 512 ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma ; RV32-NEXT: vid.v v8 +; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV32-NEXT: vmv.v.i v12, 1 +; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma ; RV32-NEXT: vsrl.vi v8, v8, 3 ; RV32-NEXT: vadd.vi v0, v8, -1 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV32-NEXT: vmv.v.i v8, 1 -; RV32-NEXT: vmerge.vim v8, v8, 0, v0 +; RV32-NEXT: vmerge.vim v8, v12, 0, v0 ; RV32-NEXT: ret ; ; RV64V-LABEL: buildvec_not_vid_v512i8_indices_overflow_1: @@ -947,11 +950,13 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_1() vscale_range(16, ; RV64V-NEXT: li a0, 512 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64V-NEXT: vid.v v8 +; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV64V-NEXT: vmv.v.i v12, 1 +; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64V-NEXT: vsrl.vi v8, v8, 2 ; RV64V-NEXT: vadd.vi v0, v8, -1 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64V-NEXT: vmv.v.i v8, 1 -; RV64V-NEXT: vmerge.vim v8, v8, 0, v0 +; RV64V-NEXT: vmerge.vim v8, v12, 0, v0 ; RV64V-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_not_vid_v512i8_indices_overflow_1: @@ -959,11 +964,13 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_1() vscale_range(16, ; RV64ZVE32-NEXT: li a0, 512 ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; RV64ZVE32-NEXT: vid.v v8 +; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma +; RV64ZVE32-NEXT: vmv.v.i v12, 1 +; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; RV64ZVE32-NEXT: vsrl.vi v8, v8, 3 ; RV64ZVE32-NEXT: vadd.vi v0, v8, -1 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma -; RV64ZVE32-NEXT: vmv.v.i v8, 1 -; RV64ZVE32-NEXT: vmerge.vim v8, v8, 0, v0 +; RV64ZVE32-NEXT: vmerge.vim v8, v12, 0, v0 ; RV64ZVE32-NEXT: ret ret <512 x i8> } @@ -973,27 +980,27 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma ; RV32-NEXT: vmv.v.i v0, 15 -; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a0, 512 ; RV32-NEXT: li a1, 240 -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: li a1, 15 -; RV32-NEXT: vmerge.vim v10, v9, -1, v0 +; RV32-NEXT: vmerge.vim v9, v8, -1, v0 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmv.v.i v12, 3 -; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmerge.vim v12, v12, 0, v0 -; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV32-NEXT: vmv.s.x v0, a1 +; RV32-NEXT: li a1, 15 +; RV32-NEXT: slli a1, a1, 8 ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma -; RV32-NEXT: vmerge.vim v10, v9, -1, v0 -; RV32-NEXT: vmv.s.x v8, a1 -; RV32-NEXT: vmv1r.v v0, v10 +; RV32-NEXT: vmerge.vim v9, v8, -1, v0 +; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmerge.vim v12, v12, 1, v0 -; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: vsetivli zero, 16, e32, mf2, ta, ma -; RV32-NEXT: vmerge.vim v8, v9, -1, v0 +; RV32-NEXT: vmerge.vim v8, v8, -1, v0 ; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV32-NEXT: vmerge.vim v8, v12, 2, v0 @@ -1003,25 +1010,23 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, ; RV64V: # %bb.0: ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma ; RV64V-NEXT: vmv.v.i v0, 3 -; RV64V-NEXT: vmv.v.i v9, 0 +; RV64V-NEXT: vmv.v.i v8, 0 ; RV64V-NEXT: li a0, 512 -; RV64V-NEXT: vmv.v.i v8, 12 -; RV64V-NEXT: li a1, 48 -; RV64V-NEXT: vmerge.vim v10, v9, -1, v0 +; RV64V-NEXT: vmerge.vim v9, v8, -1, v0 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmv.v.i v12, 3 -; RV64V-NEXT: vmv1r.v v0, v10 +; RV64V-NEXT: vmv1r.v v0, v9 ; RV64V-NEXT: vmerge.vim v12, v12, 0, v0 -; RV64V-NEXT: vmv1r.v v0, v8 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64V-NEXT: vmerge.vim v10, v9, -1, v0 -; RV64V-NEXT: vmv.s.x v8, a1 -; RV64V-NEXT: vmv.v.v v0, v10 +; RV64V-NEXT: vmv.v.i v0, 12 +; RV64V-NEXT: li a1, 48 +; RV64V-NEXT: vmerge.vim v9, v8, -1, v0 +; RV64V-NEXT: vmv.v.v v0, v9 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmerge.vim v12, v12, 1, v0 -; RV64V-NEXT: vmv1r.v v0, v8 +; RV64V-NEXT: vmv.s.x v0, a1 ; RV64V-NEXT: vsetivli zero, 8, e64, m1, ta, ma -; RV64V-NEXT: vmerge.vim v8, v9, -1, v0 +; RV64V-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64V-NEXT: vmv.v.v v0, v8 ; RV64V-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64V-NEXT: vmerge.vim v8, v12, 2, v0 @@ -1031,27 +1036,27 @@ define <512 x i8> @buildvec_not_vid_v512i8_indices_overflow_2() vscale_range(16, ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.i v0, 15 -; RV64ZVE32-NEXT: vmv.v.i v9, 0 +; RV64ZVE32-NEXT: vmv.v.i v8, 0 ; RV64ZVE32-NEXT: li a0, 512 ; RV64ZVE32-NEXT: li a1, 240 -; RV64ZVE32-NEXT: vmv.s.x v8, a1 -; RV64ZVE32-NEXT: li a1, 15 -; RV64ZVE32-NEXT: vmerge.vim v10, v9, -1, v0 +; RV64ZVE32-NEXT: vmerge.vim v9, v8, -1, v0 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmv.v.i v12, 3 -; RV64ZVE32-NEXT: slli a1, a1, 8 -; RV64ZVE32-NEXT: vmv1r.v v0, v10 +; RV64ZVE32-NEXT: vmv1r.v v0, v9 ; RV64ZVE32-NEXT: vmerge.vim v12, v12, 0, v0 -; RV64ZVE32-NEXT: vmv1r.v v0, v8 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: li a1, 15 +; RV64ZVE32-NEXT: slli a1, a1, 8 ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma -; RV64ZVE32-NEXT: vmerge.vim v10, v9, -1, v0 -; RV64ZVE32-NEXT: vmv.s.x v8, a1 -; RV64ZVE32-NEXT: vmv.v.v v0, v10 +; RV64ZVE32-NEXT: vmerge.vim v9, v8, -1, v0 +; RV64ZVE32-NEXT: vmv.v.v v0, v9 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v12, v12, 1, v0 -; RV64ZVE32-NEXT: vmv1r.v v0, v8 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m8, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 ; RV64ZVE32-NEXT: vsetivli zero, 16, e32, m1, ta, ma -; RV64ZVE32-NEXT: vmerge.vim v8, v9, -1, v0 +; RV64ZVE32-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64ZVE32-NEXT: vmv.v.v v0, v8 ; RV64ZVE32-NEXT: vsetvli zero, a0, e8, m4, ta, ma ; RV64ZVE32-NEXT: vmerge.vim v8, v12, 2, v0 @@ -1358,15 +1363,13 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV32-ONLY-NEXT: lbu t2, 9(a0) ; RV32-ONLY-NEXT: lbu t3, 10(a0) ; RV32-ONLY-NEXT: lbu t4, 11(a0) -; RV32-ONLY-NEXT: li t5, 255 -; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-ONLY-NEXT: vmv.s.x v0, t5 ; RV32-ONLY-NEXT: lbu t5, 12(a0) ; RV32-ONLY-NEXT: lbu t6, 13(a0) ; RV32-ONLY-NEXT: lbu s0, 14(a0) ; RV32-ONLY-NEXT: lbu a0, 15(a0) -; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 +; RV32-ONLY-NEXT: li a1, 255 ; RV32-ONLY-NEXT: vmv.v.x v9, t1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t2 @@ -1382,6 +1385,9 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, s0 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a0 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a1 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-ONLY-NEXT: .cfi_restore s0 @@ -1417,24 +1423,24 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV32VB-NEXT: slli t1, t1, 24 ; RV32VB-NEXT: or a7, t0, a7 ; RV32VB-NEXT: or a4, a4, a5 -; RV32VB-NEXT: lbu a5, 12(a0) +; RV32VB-NEXT: or a5, t1, a6 +; RV32VB-NEXT: lbu a6, 12(a0) ; RV32VB-NEXT: lbu t0, 13(a0) -; RV32VB-NEXT: or a6, t1, a6 ; RV32VB-NEXT: lbu t1, 14(a0) ; RV32VB-NEXT: lbu a0, 15(a0) ; RV32VB-NEXT: slli t0, t0, 8 -; RV32VB-NEXT: or a5, a5, t0 +; RV32VB-NEXT: or a6, a6, t0 ; RV32VB-NEXT: slli t1, t1, 16 ; RV32VB-NEXT: slli a0, a0, 24 ; RV32VB-NEXT: or a0, a0, t1 ; RV32VB-NEXT: or a1, a1, a3 ; RV32VB-NEXT: or a2, a2, a7 -; RV32VB-NEXT: or a3, a4, a6 -; RV32VB-NEXT: or a0, a5, a0 +; RV32VB-NEXT: or a4, a4, a5 +; RV32VB-NEXT: or a0, a6, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2 -; RV32VB-NEXT: vslide1down.vx v8, v8, a3 +; RV32VB-NEXT: vslide1down.vx v8, v8, a4 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: ret ; @@ -1449,29 +1455,29 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV32VB-PACK-NEXT: lbu a7, 6(a0) ; RV32VB-PACK-NEXT: lbu t0, 7(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 -; RV32VB-PACK-NEXT: lbu a2, 8(a0) -; RV32VB-PACK-NEXT: lbu t1, 9(a0) -; RV32VB-PACK-NEXT: lbu t2, 10(a0) -; RV32VB-PACK-NEXT: lbu t3, 11(a0) -; RV32VB-PACK-NEXT: packh a3, a3, a4 -; RV32VB-PACK-NEXT: packh a4, a5, a6 -; RV32VB-PACK-NEXT: packh a5, a7, t0 +; RV32VB-PACK-NEXT: packh a2, a3, a4 +; RV32VB-PACK-NEXT: packh a3, a5, a6 +; RV32VB-PACK-NEXT: lbu a4, 8(a0) +; RV32VB-PACK-NEXT: lbu a5, 9(a0) +; RV32VB-PACK-NEXT: lbu a6, 10(a0) +; RV32VB-PACK-NEXT: lbu t1, 11(a0) +; RV32VB-PACK-NEXT: packh a7, a7, t0 +; RV32VB-PACK-NEXT: packh a4, a4, a5 +; RV32VB-PACK-NEXT: packh a5, a6, t1 ; RV32VB-PACK-NEXT: lbu a6, 12(a0) -; RV32VB-PACK-NEXT: lbu a7, 13(a0) -; RV32VB-PACK-NEXT: lbu t0, 14(a0) +; RV32VB-PACK-NEXT: lbu t0, 13(a0) +; RV32VB-PACK-NEXT: lbu t1, 14(a0) ; RV32VB-PACK-NEXT: lbu a0, 15(a0) -; RV32VB-PACK-NEXT: packh a2, a2, t1 -; RV32VB-PACK-NEXT: packh t1, t2, t3 -; RV32VB-PACK-NEXT: packh a6, a6, a7 -; RV32VB-PACK-NEXT: packh a0, t0, a0 -; RV32VB-PACK-NEXT: pack a1, a1, a3 +; RV32VB-PACK-NEXT: packh a6, a6, t0 +; RV32VB-PACK-NEXT: packh a0, t1, a0 +; RV32VB-PACK-NEXT: pack a1, a1, a2 +; RV32VB-PACK-NEXT: pack a2, a3, a7 ; RV32VB-PACK-NEXT: pack a3, a4, a5 -; RV32VB-PACK-NEXT: pack a2, a2, t1 ; RV32VB-PACK-NEXT: pack a0, a6, a0 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a1 -; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret ; @@ -1493,15 +1499,13 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV64V-ONLY-NEXT: lbu t2, 9(a0) ; RV64V-ONLY-NEXT: lbu t3, 10(a0) ; RV64V-ONLY-NEXT: lbu t4, 11(a0) -; RV64V-ONLY-NEXT: li t5, 255 -; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.s.x v0, t5 ; RV64V-ONLY-NEXT: lbu t5, 12(a0) ; RV64V-ONLY-NEXT: lbu t6, 13(a0) ; RV64V-ONLY-NEXT: lbu s0, 14(a0) ; RV64V-ONLY-NEXT: lbu a0, 15(a0) -; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 +; RV64V-ONLY-NEXT: li a1, 255 ; RV64V-ONLY-NEXT: vmv.v.x v9, t1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t2 @@ -1517,6 +1521,9 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, s0 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a0 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64V-ONLY-NEXT: vmv.s.x v0, a1 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64V-ONLY-NEXT: .cfi_restore s0 @@ -1577,35 +1584,35 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_contigous: ; RVA22U64-PACK: # %bb.0: -; RVA22U64-PACK-NEXT: lbu a1, 0(a0) +; RVA22U64-PACK-NEXT: lbu a6, 0(a0) ; RVA22U64-PACK-NEXT: lbu a2, 1(a0) -; RVA22U64-PACK-NEXT: lbu a6, 2(a0) -; RVA22U64-PACK-NEXT: lbu a7, 3(a0) -; RVA22U64-PACK-NEXT: lbu t0, 4(a0) -; RVA22U64-PACK-NEXT: lbu a3, 5(a0) -; RVA22U64-PACK-NEXT: lbu a4, 6(a0) -; RVA22U64-PACK-NEXT: lbu a5, 7(a0) -; RVA22U64-PACK-NEXT: packh t1, a1, a2 -; RVA22U64-PACK-NEXT: lbu t2, 8(a0) -; RVA22U64-PACK-NEXT: lbu t3, 9(a0) -; RVA22U64-PACK-NEXT: lbu t4, 10(a0) +; RVA22U64-PACK-NEXT: lbu a3, 2(a0) +; RVA22U64-PACK-NEXT: lbu a4, 3(a0) +; RVA22U64-PACK-NEXT: lbu a5, 4(a0) +; RVA22U64-PACK-NEXT: lbu a1, 5(a0) +; RVA22U64-PACK-NEXT: lbu a7, 6(a0) +; RVA22U64-PACK-NEXT: lbu t0, 7(a0) +; RVA22U64-PACK-NEXT: packh a6, a6, a2 +; RVA22U64-PACK-NEXT: packh t2, a3, a4 +; RVA22U64-PACK-NEXT: packh t1, a5, a1 +; RVA22U64-PACK-NEXT: lbu a4, 8(a0) +; RVA22U64-PACK-NEXT: lbu a5, 9(a0) +; RVA22U64-PACK-NEXT: lbu a2, 10(a0) ; RVA22U64-PACK-NEXT: lbu a1, 11(a0) -; RVA22U64-PACK-NEXT: packh a6, a6, a7 -; RVA22U64-PACK-NEXT: packh a7, t0, a3 -; RVA22U64-PACK-NEXT: packh t0, a4, a5 -; RVA22U64-PACK-NEXT: lbu a5, 12(a0) -; RVA22U64-PACK-NEXT: lbu a3, 13(a0) -; RVA22U64-PACK-NEXT: lbu a2, 14(a0) +; RVA22U64-PACK-NEXT: packh a7, a7, t0 +; RVA22U64-PACK-NEXT: packh a4, a4, a5 +; RVA22U64-PACK-NEXT: packh a1, a2, a1 +; RVA22U64-PACK-NEXT: lbu a2, 12(a0) +; RVA22U64-PACK-NEXT: lbu a5, 13(a0) +; RVA22U64-PACK-NEXT: lbu a3, 14(a0) ; RVA22U64-PACK-NEXT: lbu a0, 15(a0) -; RVA22U64-PACK-NEXT: packh a4, t2, t3 -; RVA22U64-PACK-NEXT: packh a1, t4, a1 -; RVA22U64-PACK-NEXT: packh a3, a5, a3 -; RVA22U64-PACK-NEXT: packh a0, a2, a0 -; RVA22U64-PACK-NEXT: packw a2, t1, a6 -; RVA22U64-PACK-NEXT: packw a5, a7, t0 +; RVA22U64-PACK-NEXT: packh a2, a2, a5 +; RVA22U64-PACK-NEXT: packh a0, a3, a0 +; RVA22U64-PACK-NEXT: packw a3, a6, t2 +; RVA22U64-PACK-NEXT: packw a5, t1, a7 ; RVA22U64-PACK-NEXT: packw a1, a4, a1 -; RVA22U64-PACK-NEXT: packw a0, a3, a0 -; RVA22U64-PACK-NEXT: pack a2, a2, a5 +; RVA22U64-PACK-NEXT: packw a0, a2, a0 +; RVA22U64-PACK-NEXT: pack a2, a3, a5 ; RVA22U64-PACK-NEXT: pack a0, a1, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a2 @@ -1630,15 +1637,13 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV64ZVE32-NEXT: lbu t2, 9(a0) ; RV64ZVE32-NEXT: lbu t3, 10(a0) ; RV64ZVE32-NEXT: lbu t4, 11(a0) -; RV64ZVE32-NEXT: li t5, 255 -; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32-NEXT: vmv.s.x v0, t5 ; RV64ZVE32-NEXT: lbu t5, 12(a0) ; RV64ZVE32-NEXT: lbu t6, 13(a0) ; RV64ZVE32-NEXT: lbu s0, 14(a0) ; RV64ZVE32-NEXT: lbu a0, 15(a0) -; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 +; RV64ZVE32-NEXT: li a1, 255 ; RV64ZVE32-NEXT: vmv.v.x v9, t1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t2 @@ -1654,6 +1659,9 @@ define <16 x i8> @buildvec_v16i8_loads_contigous(ptr %p) { ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, s0 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, t0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64ZVE32-NEXT: .cfi_restore s0 @@ -1732,15 +1740,13 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV32-ONLY-NEXT: lbu t2, 154(a0) ; RV32-ONLY-NEXT: lbu t3, 161(a0) ; RV32-ONLY-NEXT: lbu t4, 163(a0) -; RV32-ONLY-NEXT: li t5, 255 -; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-ONLY-NEXT: vmv.s.x v0, t5 ; RV32-ONLY-NEXT: lbu t5, 93(a0) ; RV32-ONLY-NEXT: lbu t6, 105(a0) ; RV32-ONLY-NEXT: lbu s0, 124(a0) ; RV32-ONLY-NEXT: lbu a0, 144(a0) -; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 +; RV32-ONLY-NEXT: li a1, 255 ; RV32-ONLY-NEXT: vmv.v.x v9, t1 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t5 @@ -1756,6 +1762,9 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, t2 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a1 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-ONLY-NEXT: .cfi_restore s0 @@ -1777,38 +1786,38 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV32VB-NEXT: slli a2, a2, 8 ; RV32VB-NEXT: slli a3, a3, 16 ; RV32VB-NEXT: slli a4, a4, 24 +; RV32VB-NEXT: slli a7, a7, 8 ; RV32VB-NEXT: or a1, a1, a2 ; RV32VB-NEXT: or a3, a4, a3 -; RV32VB-NEXT: lbu a2, 93(a0) -; RV32VB-NEXT: lbu a4, 105(a0) -; RV32VB-NEXT: lbu t2, 124(a0) -; RV32VB-NEXT: lbu t3, 144(a0) -; RV32VB-NEXT: slli a7, a7, 8 +; RV32VB-NEXT: or a2, a6, a7 +; RV32VB-NEXT: lbu a4, 93(a0) +; RV32VB-NEXT: lbu a6, 105(a0) +; RV32VB-NEXT: lbu a7, 124(a0) +; RV32VB-NEXT: lbu t2, 144(a0) ; RV32VB-NEXT: slli a5, a5, 16 ; RV32VB-NEXT: slli t0, t0, 24 -; RV32VB-NEXT: slli a2, a2, 8 -; RV32VB-NEXT: or a6, a6, a7 +; RV32VB-NEXT: slli a4, a4, 8 ; RV32VB-NEXT: or a5, t0, a5 -; RV32VB-NEXT: lbu a7, 154(a0) -; RV32VB-NEXT: lbu t0, 161(a0) -; RV32VB-NEXT: or a2, t1, a2 +; RV32VB-NEXT: or a4, t1, a4 +; RV32VB-NEXT: lbu t0, 154(a0) +; RV32VB-NEXT: lbu t1, 161(a0) ; RV32VB-NEXT: lbu a0, 163(a0) -; RV32VB-NEXT: slli a4, a4, 16 -; RV32VB-NEXT: slli t0, t0, 24 -; RV32VB-NEXT: or a4, t0, a4 +; RV32VB-NEXT: slli a6, a6, 16 +; RV32VB-NEXT: slli t1, t1, 24 +; RV32VB-NEXT: or a6, t1, a6 ; RV32VB-NEXT: slli a0, a0, 8 -; RV32VB-NEXT: or a0, t2, a0 -; RV32VB-NEXT: slli t3, t3, 16 -; RV32VB-NEXT: slli a7, a7, 24 -; RV32VB-NEXT: or a7, a7, t3 +; RV32VB-NEXT: or a0, a7, a0 +; RV32VB-NEXT: slli t2, t2, 16 +; RV32VB-NEXT: slli t0, t0, 24 +; RV32VB-NEXT: or a7, t0, t2 ; RV32VB-NEXT: or a1, a1, a3 -; RV32VB-NEXT: or a3, a6, a5 -; RV32VB-NEXT: or a2, a2, a4 +; RV32VB-NEXT: or a2, a2, a5 +; RV32VB-NEXT: or a3, a4, a6 ; RV32VB-NEXT: or a0, a0, a7 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 -; RV32VB-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: ret ; @@ -1824,24 +1833,24 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV32VB-PACK-NEXT: lbu t0, 75(a0) ; RV32VB-PACK-NEXT: lbu t1, 82(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 -; RV32VB-PACK-NEXT: lbu a2, 154(a0) -; RV32VB-PACK-NEXT: lbu t2, 161(a0) -; RV32VB-PACK-NEXT: lbu t3, 163(a0) -; RV32VB-PACK-NEXT: packh a3, a3, a4 -; RV32VB-PACK-NEXT: packh a4, a6, a7 +; RV32VB-PACK-NEXT: packh a2, a3, a4 +; RV32VB-PACK-NEXT: packh a3, a6, a7 +; RV32VB-PACK-NEXT: lbu a4, 93(a0) +; RV32VB-PACK-NEXT: lbu a6, 105(a0) +; RV32VB-PACK-NEXT: lbu a7, 124(a0) +; RV32VB-PACK-NEXT: lbu t2, 144(a0) ; RV32VB-PACK-NEXT: packh a5, a5, t0 -; RV32VB-PACK-NEXT: lbu a6, 93(a0) -; RV32VB-PACK-NEXT: lbu a7, 105(a0) -; RV32VB-PACK-NEXT: lbu t0, 124(a0) -; RV32VB-PACK-NEXT: lbu a0, 144(a0) -; RV32VB-PACK-NEXT: packh a6, t1, a6 -; RV32VB-PACK-NEXT: packh a7, a7, t2 -; RV32VB-PACK-NEXT: packh t0, t0, t3 -; RV32VB-PACK-NEXT: packh a0, a0, a2 -; RV32VB-PACK-NEXT: pack a1, a1, a3 -; RV32VB-PACK-NEXT: pack a2, a4, a5 -; RV32VB-PACK-NEXT: pack a3, a6, a7 -; RV32VB-PACK-NEXT: pack a0, t0, a0 +; RV32VB-PACK-NEXT: packh a4, t1, a4 +; RV32VB-PACK-NEXT: lbu t0, 154(a0) +; RV32VB-PACK-NEXT: lbu t1, 161(a0) +; RV32VB-PACK-NEXT: lbu a0, 163(a0) +; RV32VB-PACK-NEXT: packh a6, a6, t1 +; RV32VB-PACK-NEXT: packh a0, a7, a0 +; RV32VB-PACK-NEXT: packh a7, t2, t0 +; RV32VB-PACK-NEXT: pack a1, a1, a2 +; RV32VB-PACK-NEXT: pack a2, a3, a5 +; RV32VB-PACK-NEXT: pack a3, a4, a6 +; RV32VB-PACK-NEXT: pack a0, a0, a7 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 @@ -1867,15 +1876,13 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV64V-ONLY-NEXT: lbu t2, 154(a0) ; RV64V-ONLY-NEXT: lbu t3, 161(a0) ; RV64V-ONLY-NEXT: lbu t4, 163(a0) -; RV64V-ONLY-NEXT: li t5, 255 -; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.s.x v0, t5 ; RV64V-ONLY-NEXT: lbu t5, 93(a0) ; RV64V-ONLY-NEXT: lbu t6, 105(a0) ; RV64V-ONLY-NEXT: lbu s0, 124(a0) ; RV64V-ONLY-NEXT: lbu a0, 144(a0) -; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 +; RV64V-ONLY-NEXT: li a1, 255 ; RV64V-ONLY-NEXT: vmv.v.x v9, t1 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t5 @@ -1891,6 +1898,9 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, t0 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, t2 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64V-ONLY-NEXT: vmv.s.x v0, a1 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64V-ONLY-NEXT: .cfi_restore s0 @@ -1900,98 +1910,90 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; ; RVA22U64-LABEL: buildvec_v16i8_loads_gather: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: lbu a1, 0(a0) +; RVA22U64-NEXT: lbu a7, 0(a0) ; RVA22U64-NEXT: lbu a2, 1(a0) ; RVA22U64-NEXT: lbu a3, 22(a0) ; RVA22U64-NEXT: lbu a4, 31(a0) ; RVA22U64-NEXT: lbu a6, 623(a0) -; RVA22U64-NEXT: lbu t0, 44(a0) -; RVA22U64-NEXT: lbu a7, 55(a0) -; RVA22U64-NEXT: lbu a5, 75(a0) +; RVA22U64-NEXT: lbu a5, 44(a0) +; RVA22U64-NEXT: lbu a1, 55(a0) +; RVA22U64-NEXT: lbu t0, 75(a0) ; RVA22U64-NEXT: lbu t1, 82(a0) ; RVA22U64-NEXT: slli a2, a2, 8 ; RVA22U64-NEXT: slli a3, a3, 16 ; RVA22U64-NEXT: slli a4, a4, 24 -; RVA22U64-NEXT: or t2, a1, a2 +; RVA22U64-NEXT: slli a5, a5, 32 +; RVA22U64-NEXT: slli a1, a1, 40 +; RVA22U64-NEXT: or a7, a7, a2 ; RVA22U64-NEXT: or t3, a4, a3 -; RVA22U64-NEXT: lbu a2, 93(a0) +; RVA22U64-NEXT: or t2, a1, a5 +; RVA22U64-NEXT: lbu a4, 93(a0) ; RVA22U64-NEXT: lbu t4, 105(a0) -; RVA22U64-NEXT: lbu t6, 124(a0) +; RVA22U64-NEXT: lbu a2, 124(a0) ; RVA22U64-NEXT: lbu t5, 144(a0) -; RVA22U64-NEXT: slli t0, t0, 32 -; RVA22U64-NEXT: slli a7, a7, 40 ; RVA22U64-NEXT: slli a6, a6, 48 -; RVA22U64-NEXT: slli a5, a5, 56 -; RVA22U64-NEXT: slli a2, a2, 8 -; RVA22U64-NEXT: or a7, a7, t0 -; RVA22U64-NEXT: or a5, a5, a6 -; RVA22U64-NEXT: lbu a3, 154(a0) +; RVA22U64-NEXT: slli t0, t0, 56 +; RVA22U64-NEXT: slli a4, a4, 8 +; RVA22U64-NEXT: or a3, t0, a6 +; RVA22U64-NEXT: or a4, t1, a4 +; RVA22U64-NEXT: lbu a5, 154(a0) ; RVA22U64-NEXT: lbu a1, 161(a0) -; RVA22U64-NEXT: or a2, t1, a2 ; RVA22U64-NEXT: lbu a0, 163(a0) ; RVA22U64-NEXT: slli t4, t4, 16 ; RVA22U64-NEXT: slli a1, a1, 24 ; RVA22U64-NEXT: or a1, a1, t4 -; RVA22U64-NEXT: slli t6, t6, 32 +; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: slli a0, a0, 40 -; RVA22U64-NEXT: or a0, a0, t6 +; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: slli t5, t5, 48 -; RVA22U64-NEXT: slli a3, a3, 56 -; RVA22U64-NEXT: or a3, a3, t5 -; RVA22U64-NEXT: or a4, t2, t3 -; RVA22U64-NEXT: or a5, a5, a7 -; RVA22U64-NEXT: or a1, a1, a2 -; RVA22U64-NEXT: or a0, a0, a3 -; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: slli a5, a5, 56 +; RVA22U64-NEXT: or a2, a5, t5 +; RVA22U64-NEXT: or a5, a7, t3 +; RVA22U64-NEXT: or a3, a3, t2 +; RVA22U64-NEXT: or a1, a1, a4 +; RVA22U64-NEXT: or a0, a0, a2 +; RVA22U64-NEXT: or a3, a3, a5 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RVA22U64-NEXT: vmv.v.x v8, a4 +; RVA22U64-NEXT: vmv.v.x v8, a3 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_loads_gather: ; RVA22U64-PACK: # %bb.0: -; RVA22U64-PACK-NEXT: addi sp, sp, -16 -; RVA22U64-PACK-NEXT: .cfi_def_cfa_offset 16 -; RVA22U64-PACK-NEXT: sd s0, 8(sp) # 8-byte Folded Spill -; RVA22U64-PACK-NEXT: .cfi_offset s0, -8 -; RVA22U64-PACK-NEXT: lbu a1, 0(a0) -; RVA22U64-PACK-NEXT: lbu a2, 1(a0) -; RVA22U64-PACK-NEXT: lbu a6, 22(a0) -; RVA22U64-PACK-NEXT: lbu a7, 31(a0) -; RVA22U64-PACK-NEXT: lbu t0, 623(a0) -; RVA22U64-PACK-NEXT: lbu t3, 44(a0) -; RVA22U64-PACK-NEXT: lbu t4, 55(a0) -; RVA22U64-PACK-NEXT: lbu t5, 75(a0) -; RVA22U64-PACK-NEXT: lbu t1, 82(a0) -; RVA22U64-PACK-NEXT: packh t2, a1, a2 -; RVA22U64-PACK-NEXT: lbu t6, 154(a0) -; RVA22U64-PACK-NEXT: lbu s0, 161(a0) -; RVA22U64-PACK-NEXT: lbu a3, 163(a0) -; RVA22U64-PACK-NEXT: packh a6, a6, a7 -; RVA22U64-PACK-NEXT: packh a7, t3, t4 -; RVA22U64-PACK-NEXT: packh a2, t0, t5 +; RVA22U64-PACK-NEXT: lbu a7, 0(a0) +; RVA22U64-PACK-NEXT: lbu t1, 1(a0) +; RVA22U64-PACK-NEXT: lbu a3, 22(a0) +; RVA22U64-PACK-NEXT: lbu a4, 31(a0) +; RVA22U64-PACK-NEXT: lbu a6, 623(a0) +; RVA22U64-PACK-NEXT: lbu a5, 44(a0) +; RVA22U64-PACK-NEXT: lbu a1, 55(a0) +; RVA22U64-PACK-NEXT: lbu t0, 75(a0) +; RVA22U64-PACK-NEXT: lbu t3, 82(a0) +; RVA22U64-PACK-NEXT: packh a7, a7, t1 +; RVA22U64-PACK-NEXT: packh t2, a3, a4 +; RVA22U64-PACK-NEXT: packh t1, a5, a1 ; RVA22U64-PACK-NEXT: lbu a4, 93(a0) -; RVA22U64-PACK-NEXT: lbu a5, 105(a0) -; RVA22U64-PACK-NEXT: lbu a1, 124(a0) -; RVA22U64-PACK-NEXT: lbu a0, 144(a0) -; RVA22U64-PACK-NEXT: packh a4, t1, a4 -; RVA22U64-PACK-NEXT: packh a5, a5, s0 -; RVA22U64-PACK-NEXT: packh a1, a1, a3 -; RVA22U64-PACK-NEXT: packh a0, a0, t6 -; RVA22U64-PACK-NEXT: packw a3, t2, a6 -; RVA22U64-PACK-NEXT: packw a2, a7, a2 -; RVA22U64-PACK-NEXT: packw a4, a4, a5 -; RVA22U64-PACK-NEXT: packw a0, a1, a0 -; RVA22U64-PACK-NEXT: pack a1, a3, a2 -; RVA22U64-PACK-NEXT: pack a0, a4, a0 +; RVA22U64-PACK-NEXT: lbu t4, 105(a0) +; RVA22U64-PACK-NEXT: lbu t5, 124(a0) +; RVA22U64-PACK-NEXT: lbu a3, 144(a0) +; RVA22U64-PACK-NEXT: packh a2, a6, t0 +; RVA22U64-PACK-NEXT: packh a4, t3, a4 +; RVA22U64-PACK-NEXT: lbu a5, 154(a0) +; RVA22U64-PACK-NEXT: lbu a1, 161(a0) +; RVA22U64-PACK-NEXT: lbu a0, 163(a0) +; RVA22U64-PACK-NEXT: packh a1, t4, a1 +; RVA22U64-PACK-NEXT: packh a0, t5, a0 +; RVA22U64-PACK-NEXT: packh a3, a3, a5 +; RVA22U64-PACK-NEXT: packw a5, a7, t2 +; RVA22U64-PACK-NEXT: packw a2, t1, a2 +; RVA22U64-PACK-NEXT: packw a1, a4, a1 +; RVA22U64-PACK-NEXT: packw a0, a0, a3 +; RVA22U64-PACK-NEXT: pack a2, a5, a2 +; RVA22U64-PACK-NEXT: pack a0, a1, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RVA22U64-PACK-NEXT: vmv.v.x v8, a1 +; RVA22U64-PACK-NEXT: vmv.v.x v8, a2 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 -; RVA22U64-PACK-NEXT: ld s0, 8(sp) # 8-byte Folded Reload -; RVA22U64-PACK-NEXT: .cfi_restore s0 -; RVA22U64-PACK-NEXT: addi sp, sp, 16 -; RVA22U64-PACK-NEXT: .cfi_def_cfa_offset 0 ; RVA22U64-PACK-NEXT: ret ; ; RV64ZVE32-LABEL: buildvec_v16i8_loads_gather: @@ -2012,15 +2014,13 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV64ZVE32-NEXT: lbu t2, 154(a0) ; RV64ZVE32-NEXT: lbu t3, 161(a0) ; RV64ZVE32-NEXT: lbu t4, 163(a0) -; RV64ZVE32-NEXT: li t5, 255 -; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32-NEXT: vmv.s.x v0, t5 ; RV64ZVE32-NEXT: lbu t5, 93(a0) ; RV64ZVE32-NEXT: lbu t6, 105(a0) ; RV64ZVE32-NEXT: lbu s0, 124(a0) ; RV64ZVE32-NEXT: lbu a0, 144(a0) -; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 +; RV64ZVE32-NEXT: li a1, 255 ; RV64ZVE32-NEXT: vmv.v.x v9, t1 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t5 @@ -2036,6 +2036,9 @@ define <16 x i8> @buildvec_v16i8_loads_gather(ptr %p) { ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a0 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, t0 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, t2 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; RV64ZVE32-NEXT: .cfi_restore s0 @@ -2118,28 +2121,28 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; ; RV32VB-LABEL: buildvec_v16i8_undef_low_half: ; RV32VB: # %bb.0: -; RV32VB-NEXT: lbu a1, 93(a0) -; RV32VB-NEXT: lbu a2, 82(a0) +; RV32VB-NEXT: lbu a1, 82(a0) +; RV32VB-NEXT: lbu a2, 93(a0) ; RV32VB-NEXT: lbu a3, 105(a0) ; RV32VB-NEXT: lbu a4, 124(a0) -; RV32VB-NEXT: slli a1, a1, 8 -; RV32VB-NEXT: lbu a5, 144(a0) -; RV32VB-NEXT: lbu a6, 154(a0) -; RV32VB-NEXT: lbu a7, 161(a0) -; RV32VB-NEXT: or a1, a2, a1 +; RV32VB-NEXT: slli a2, a2, 8 +; RV32VB-NEXT: or a1, a1, a2 +; RV32VB-NEXT: lbu a2, 144(a0) +; RV32VB-NEXT: lbu a5, 154(a0) +; RV32VB-NEXT: lbu a6, 161(a0) ; RV32VB-NEXT: lbu a0, 163(a0) ; RV32VB-NEXT: slli a3, a3, 16 -; RV32VB-NEXT: slli a7, a7, 24 -; RV32VB-NEXT: or a2, a7, a3 +; RV32VB-NEXT: slli a6, a6, 24 +; RV32VB-NEXT: or a3, a6, a3 ; RV32VB-NEXT: slli a0, a0, 8 ; RV32VB-NEXT: or a0, a4, a0 -; RV32VB-NEXT: slli a5, a5, 16 -; RV32VB-NEXT: slli a6, a6, 24 -; RV32VB-NEXT: or a3, a6, a5 +; RV32VB-NEXT: slli a2, a2, 16 +; RV32VB-NEXT: slli a5, a5, 24 +; RV32VB-NEXT: or a2, a5, a2 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.i v8, 0 -; RV32VB-NEXT: or a1, a1, a2 -; RV32VB-NEXT: or a0, a0, a3 +; RV32VB-NEXT: or a1, a1, a3 +; RV32VB-NEXT: or a0, a0, a2 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero ; RV32VB-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 @@ -2151,21 +2154,21 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; RV32VB-PACK-NEXT: lbu a2, 93(a0) ; RV32VB-PACK-NEXT: lbu a3, 105(a0) ; RV32VB-PACK-NEXT: lbu a4, 124(a0) -; RV32VB-PACK-NEXT: lbu a5, 161(a0) -; RV32VB-PACK-NEXT: lbu a6, 163(a0) -; RV32VB-PACK-NEXT: lbu a7, 144(a0) -; RV32VB-PACK-NEXT: lbu a0, 154(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 -; RV32VB-PACK-NEXT: packh a2, a3, a5 -; RV32VB-PACK-NEXT: packh a3, a4, a6 -; RV32VB-PACK-NEXT: packh a0, a7, a0 -; RV32VB-PACK-NEXT: pack a1, a1, a2 -; RV32VB-PACK-NEXT: packh a2, a0, a0 -; RV32VB-PACK-NEXT: pack a2, a2, a2 +; RV32VB-PACK-NEXT: lbu a2, 144(a0) +; RV32VB-PACK-NEXT: lbu a5, 154(a0) +; RV32VB-PACK-NEXT: lbu a6, 161(a0) +; RV32VB-PACK-NEXT: lbu a0, 163(a0) +; RV32VB-PACK-NEXT: packh a3, a3, a6 +; RV32VB-PACK-NEXT: packh a0, a4, a0 +; RV32VB-PACK-NEXT: packh a2, a2, a5 +; RV32VB-PACK-NEXT: pack a1, a1, a3 +; RV32VB-PACK-NEXT: packh a3, a0, a0 +; RV32VB-PACK-NEXT: pack a3, a3, a3 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32VB-PACK-NEXT: vmv.v.x v8, a2 -; RV32VB-PACK-NEXT: pack a0, a3, a0 -; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-PACK-NEXT: vmv.v.x v8, a3 +; RV32VB-PACK-NEXT: pack a0, a0, a2 +; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret @@ -2193,26 +2196,26 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; ; RVA22U64-LABEL: buildvec_v16i8_undef_low_half: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: lbu a1, 93(a0) -; RVA22U64-NEXT: lbu a6, 82(a0) -; RVA22U64-NEXT: lbu a7, 105(a0) +; RVA22U64-NEXT: lbu a1, 82(a0) +; RVA22U64-NEXT: lbu a2, 93(a0) +; RVA22U64-NEXT: lbu a3, 105(a0) ; RVA22U64-NEXT: lbu a4, 124(a0) -; RVA22U64-NEXT: slli a1, a1, 8 -; RVA22U64-NEXT: lbu a5, 144(a0) -; RVA22U64-NEXT: lbu a2, 154(a0) -; RVA22U64-NEXT: lbu a3, 161(a0) -; RVA22U64-NEXT: or a1, a6, a1 +; RVA22U64-NEXT: slli a2, a2, 8 +; RVA22U64-NEXT: or a6, a1, a2 +; RVA22U64-NEXT: lbu a2, 144(a0) +; RVA22U64-NEXT: lbu a5, 154(a0) +; RVA22U64-NEXT: lbu a1, 161(a0) ; RVA22U64-NEXT: lbu a0, 163(a0) -; RVA22U64-NEXT: slli a7, a7, 16 -; RVA22U64-NEXT: slli a3, a3, 24 -; RVA22U64-NEXT: or a3, a3, a7 +; RVA22U64-NEXT: slli a3, a3, 16 +; RVA22U64-NEXT: slli a1, a1, 24 +; RVA22U64-NEXT: or a1, a1, a3 ; RVA22U64-NEXT: slli a4, a4, 32 ; RVA22U64-NEXT: slli a0, a0, 40 ; RVA22U64-NEXT: or a0, a0, a4 -; RVA22U64-NEXT: slli a5, a5, 48 -; RVA22U64-NEXT: slli a2, a2, 56 +; RVA22U64-NEXT: slli a2, a2, 48 +; RVA22U64-NEXT: slli a5, a5, 56 ; RVA22U64-NEXT: or a2, a2, a5 -; RVA22U64-NEXT: or a1, a1, a3 +; RVA22U64-NEXT: or a1, a6, a1 ; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -2222,24 +2225,24 @@ define <16 x i8> @buildvec_v16i8_undef_low_half(ptr %p) { ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_low_half: ; RVA22U64-PACK: # %bb.0: -; RVA22U64-PACK-NEXT: lbu a6, 82(a0) -; RVA22U64-PACK-NEXT: lbu a7, 93(a0) -; RVA22U64-PACK-NEXT: lbu t0, 105(a0) +; RVA22U64-PACK-NEXT: lbu a1, 82(a0) +; RVA22U64-PACK-NEXT: lbu a2, 93(a0) +; RVA22U64-PACK-NEXT: lbu a6, 105(a0) ; RVA22U64-PACK-NEXT: lbu a4, 124(a0) -; RVA22U64-PACK-NEXT: lbu a5, 161(a0) -; RVA22U64-PACK-NEXT: lbu a1, 163(a0) +; RVA22U64-PACK-NEXT: packh a1, a1, a2 ; RVA22U64-PACK-NEXT: lbu a2, 144(a0) -; RVA22U64-PACK-NEXT: lbu a0, 154(a0) -; RVA22U64-PACK-NEXT: packh a3, a6, a7 -; RVA22U64-PACK-NEXT: packh a5, t0, a5 -; RVA22U64-PACK-NEXT: packh a1, a4, a1 -; RVA22U64-PACK-NEXT: packh a0, a2, a0 -; RVA22U64-PACK-NEXT: packw a2, a3, a5 +; RVA22U64-PACK-NEXT: lbu a5, 154(a0) +; RVA22U64-PACK-NEXT: lbu a3, 161(a0) +; RVA22U64-PACK-NEXT: lbu a0, 163(a0) +; RVA22U64-PACK-NEXT: packh a3, a6, a3 +; RVA22U64-PACK-NEXT: packh a0, a4, a0 +; RVA22U64-PACK-NEXT: packh a2, a2, a5 +; RVA22U64-PACK-NEXT: packw a1, a1, a3 ; RVA22U64-PACK-NEXT: packh a3, a0, a0 ; RVA22U64-PACK-NEXT: packw a3, a3, a3 ; RVA22U64-PACK-NEXT: pack a3, a3, a3 -; RVA22U64-PACK-NEXT: packw a0, a1, a0 -; RVA22U64-PACK-NEXT: pack a0, a2, a0 +; RVA22U64-PACK-NEXT: packw a0, a0, a2 +; RVA22U64-PACK-NEXT: pack a0, a1, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a3 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 @@ -2319,26 +2322,26 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; ; RV32VB-LABEL: buildvec_v16i8_undef_high_half: ; RV32VB: # %bb.0: -; RV32VB-NEXT: lbu a1, 1(a0) -; RV32VB-NEXT: lbu a2, 22(a0) -; RV32VB-NEXT: lbu a3, 31(a0) -; RV32VB-NEXT: lbu a4, 0(a0) -; RV32VB-NEXT: slli a1, a1, 8 -; RV32VB-NEXT: slli a2, a2, 16 -; RV32VB-NEXT: slli a3, a3, 24 -; RV32VB-NEXT: or a1, a4, a1 -; RV32VB-NEXT: lbu a4, 44(a0) -; RV32VB-NEXT: lbu a5, 55(a0) -; RV32VB-NEXT: or a2, a3, a2 -; RV32VB-NEXT: lbu a3, 623(a0) -; RV32VB-NEXT: lbu a0, 75(a0) -; RV32VB-NEXT: slli a5, a5, 8 -; RV32VB-NEXT: or a4, a4, a5 +; RV32VB-NEXT: lbu a1, 0(a0) +; RV32VB-NEXT: lbu a2, 1(a0) +; RV32VB-NEXT: lbu a3, 22(a0) +; RV32VB-NEXT: lbu a4, 31(a0) +; RV32VB-NEXT: slli a2, a2, 8 ; RV32VB-NEXT: slli a3, a3, 16 -; RV32VB-NEXT: slli a0, a0, 24 -; RV32VB-NEXT: or a0, a0, a3 +; RV32VB-NEXT: slli a4, a4, 24 ; RV32VB-NEXT: or a1, a1, a2 -; RV32VB-NEXT: or a0, a4, a0 +; RV32VB-NEXT: or a3, a4, a3 +; RV32VB-NEXT: lbu a2, 44(a0) +; RV32VB-NEXT: lbu a4, 55(a0) +; RV32VB-NEXT: lbu a5, 75(a0) +; RV32VB-NEXT: slli a4, a4, 8 +; RV32VB-NEXT: or a2, a2, a4 +; RV32VB-NEXT: lbu a0, 623(a0) +; RV32VB-NEXT: slli a0, a0, 16 +; RV32VB-NEXT: slli a5, a5, 24 +; RV32VB-NEXT: or a0, a5, a0 +; RV32VB-NEXT: or a1, a1, a3 +; RV32VB-NEXT: or a0, a2, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 @@ -2352,14 +2355,14 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; RV32VB-PACK-NEXT: lbu a2, 1(a0) ; RV32VB-PACK-NEXT: lbu a3, 22(a0) ; RV32VB-PACK-NEXT: lbu a4, 31(a0) -; RV32VB-PACK-NEXT: lbu a5, 623(a0) -; RV32VB-PACK-NEXT: lbu a6, 44(a0) -; RV32VB-PACK-NEXT: lbu a7, 55(a0) -; RV32VB-PACK-NEXT: lbu a0, 75(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 ; RV32VB-PACK-NEXT: packh a2, a3, a4 -; RV32VB-PACK-NEXT: packh a3, a6, a7 -; RV32VB-PACK-NEXT: packh a0, a5, a0 +; RV32VB-PACK-NEXT: lbu a3, 44(a0) +; RV32VB-PACK-NEXT: lbu a4, 55(a0) +; RV32VB-PACK-NEXT: lbu a5, 75(a0) +; RV32VB-PACK-NEXT: packh a3, a3, a4 +; RV32VB-PACK-NEXT: lbu a0, 623(a0) +; RV32VB-PACK-NEXT: packh a0, a0, a5 ; RV32VB-PACK-NEXT: pack a1, a1, a2 ; RV32VB-PACK-NEXT: packh a2, a0, a0 ; RV32VB-PACK-NEXT: pack a0, a3, a0 @@ -2395,27 +2398,27 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; ; RVA22U64-LABEL: buildvec_v16i8_undef_high_half: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: lbu a1, 1(a0) -; RVA22U64-NEXT: lbu a2, 22(a0) -; RVA22U64-NEXT: lbu a3, 31(a0) -; RVA22U64-NEXT: lbu a4, 0(a0) -; RVA22U64-NEXT: slli a1, a1, 8 -; RVA22U64-NEXT: slli a2, a2, 16 -; RVA22U64-NEXT: slli a3, a3, 24 -; RVA22U64-NEXT: or a1, a1, a4 -; RVA22U64-NEXT: or a2, a2, a3 -; RVA22U64-NEXT: lbu a3, 44(a0) +; RVA22U64-NEXT: lbu a1, 0(a0) +; RVA22U64-NEXT: lbu a2, 1(a0) +; RVA22U64-NEXT: lbu a3, 22(a0) +; RVA22U64-NEXT: lbu a4, 31(a0) +; RVA22U64-NEXT: slli a2, a2, 8 +; RVA22U64-NEXT: slli a3, a3, 16 +; RVA22U64-NEXT: slli a4, a4, 24 +; RVA22U64-NEXT: or a1, a1, a2 +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: lbu a2, 44(a0) ; RVA22U64-NEXT: lbu a4, 55(a0) -; RVA22U64-NEXT: lbu a5, 623(a0) -; RVA22U64-NEXT: lbu a0, 75(a0) -; RVA22U64-NEXT: slli a3, a3, 32 +; RVA22U64-NEXT: lbu a5, 75(a0) +; RVA22U64-NEXT: slli a2, a2, 32 ; RVA22U64-NEXT: slli a4, a4, 40 -; RVA22U64-NEXT: or a3, a3, a4 -; RVA22U64-NEXT: slli a5, a5, 48 -; RVA22U64-NEXT: slli a0, a0, 56 +; RVA22U64-NEXT: or a2, a2, a4 +; RVA22U64-NEXT: lbu a0, 623(a0) +; RVA22U64-NEXT: slli a0, a0, 48 +; RVA22U64-NEXT: slli a5, a5, 56 ; RVA22U64-NEXT: or a0, a0, a5 -; RVA22U64-NEXT: or a1, a1, a2 -; RVA22U64-NEXT: or a0, a0, a3 +; RVA22U64-NEXT: or a1, a1, a3 +; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: or a0, a0, a1 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-NEXT: vmv.v.x v8, a0 @@ -2424,26 +2427,26 @@ define <16 x i8> @buildvec_v16i8_undef_high_half(ptr %p) { ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_high_half: ; RVA22U64-PACK: # %bb.0: -; RVA22U64-PACK-NEXT: lbu a6, 0(a0) -; RVA22U64-PACK-NEXT: lbu a7, 1(a0) -; RVA22U64-PACK-NEXT: lbu t0, 22(a0) +; RVA22U64-PACK-NEXT: lbu a1, 0(a0) +; RVA22U64-PACK-NEXT: lbu a2, 1(a0) +; RVA22U64-PACK-NEXT: lbu a3, 22(a0) ; RVA22U64-PACK-NEXT: lbu a4, 31(a0) -; RVA22U64-PACK-NEXT: lbu a5, 623(a0) -; RVA22U64-PACK-NEXT: lbu a1, 44(a0) -; RVA22U64-PACK-NEXT: lbu a2, 55(a0) -; RVA22U64-PACK-NEXT: lbu a0, 75(a0) -; RVA22U64-PACK-NEXT: packh a3, a6, a7 -; RVA22U64-PACK-NEXT: packh a4, t0, a4 ; RVA22U64-PACK-NEXT: packh a1, a1, a2 -; RVA22U64-PACK-NEXT: packh a0, a5, a0 -; RVA22U64-PACK-NEXT: packw a2, a3, a4 -; RVA22U64-PACK-NEXT: packh a3, a0, a0 -; RVA22U64-PACK-NEXT: packw a3, a3, a3 -; RVA22U64-PACK-NEXT: packw a0, a1, a0 -; RVA22U64-PACK-NEXT: pack a0, a2, a0 +; RVA22U64-PACK-NEXT: packh a2, a3, a4 +; RVA22U64-PACK-NEXT: lbu a3, 44(a0) +; RVA22U64-PACK-NEXT: lbu a4, 55(a0) +; RVA22U64-PACK-NEXT: lbu a5, 75(a0) +; RVA22U64-PACK-NEXT: packh a3, a3, a4 +; RVA22U64-PACK-NEXT: lbu a0, 623(a0) +; RVA22U64-PACK-NEXT: packh a0, a0, a5 +; RVA22U64-PACK-NEXT: packw a1, a1, a2 +; RVA22U64-PACK-NEXT: packh a2, a0, a0 +; RVA22U64-PACK-NEXT: packw a2, a2, a2 +; RVA22U64-PACK-NEXT: packw a0, a3, a0 +; RVA22U64-PACK-NEXT: pack a0, a1, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a0 -; RVA22U64-PACK-NEXT: pack a0, a3, a3 +; RVA22U64-PACK-NEXT: pack a0, a2, a2 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; @@ -2504,15 +2507,13 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV32-ONLY-NEXT: lbu a3, 44(a0) ; RV32-ONLY-NEXT: lbu a4, 55(a0) ; RV32-ONLY-NEXT: lbu a5, 75(a0) -; RV32-ONLY-NEXT: li a6, 255 -; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-ONLY-NEXT: vmv.s.x v0, a6 ; RV32-ONLY-NEXT: lbu a6, 82(a0) ; RV32-ONLY-NEXT: lbu a7, 93(a0) ; RV32-ONLY-NEXT: lbu t0, 105(a0) ; RV32-ONLY-NEXT: lbu a0, 161(a0) -; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a2 +; RV32-ONLY-NEXT: li a2, 255 ; RV32-ONLY-NEXT: vmv.v.x v9, a6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7 @@ -2522,35 +2523,38 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV32-ONLY-NEXT: vslidedown.vi v8, v9, 4 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a2 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v16i8_undef_edges: ; RV32VB: # %bb.0: ; RV32VB-NEXT: lbu a1, 623(a0) -; RV32VB-NEXT: lbu a2, 55(a0) -; RV32VB-NEXT: lbu a3, 75(a0) -; RV32VB-NEXT: lbu a4, 31(a0) -; RV32VB-NEXT: lbu a5, 44(a0) -; RV32VB-NEXT: slli a2, a2, 8 +; RV32VB-NEXT: lbu a2, 31(a0) +; RV32VB-NEXT: lbu a3, 44(a0) +; RV32VB-NEXT: lbu a4, 55(a0) +; RV32VB-NEXT: lbu a5, 75(a0) +; RV32VB-NEXT: slli a4, a4, 8 ; RV32VB-NEXT: slli a1, a1, 16 -; RV32VB-NEXT: slli a3, a3, 24 -; RV32VB-NEXT: or a2, a5, a2 -; RV32VB-NEXT: lbu a5, 82(a0) -; RV32VB-NEXT: lbu a6, 93(a0) -; RV32VB-NEXT: or a1, a3, a1 -; RV32VB-NEXT: lbu a3, 105(a0) +; RV32VB-NEXT: slli a5, a5, 24 +; RV32VB-NEXT: or a3, a3, a4 +; RV32VB-NEXT: or a1, a5, a1 +; RV32VB-NEXT: lbu a4, 82(a0) +; RV32VB-NEXT: lbu a5, 93(a0) +; RV32VB-NEXT: lbu a6, 105(a0) ; RV32VB-NEXT: lbu a0, 161(a0) -; RV32VB-NEXT: slli a6, a6, 8 -; RV32VB-NEXT: or a5, a5, a6 -; RV32VB-NEXT: slli a3, a3, 16 +; RV32VB-NEXT: slli a5, a5, 8 +; RV32VB-NEXT: or a4, a4, a5 +; RV32VB-NEXT: slli a6, a6, 16 ; RV32VB-NEXT: slli a0, a0, 24 -; RV32VB-NEXT: or a0, a0, a3 -; RV32VB-NEXT: slli a4, a4, 24 -; RV32VB-NEXT: or a1, a2, a1 -; RV32VB-NEXT: or a0, a5, a0 +; RV32VB-NEXT: or a0, a0, a6 +; RV32VB-NEXT: slli a2, a2, 24 +; RV32VB-NEXT: or a1, a3, a1 +; RV32VB-NEXT: or a0, a4, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32VB-NEXT: vmv.v.x v8, a4 +; RV32VB-NEXT: vmv.v.x v8, a2 ; RV32VB-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: vslide1down.vx v8, v8, zero @@ -2563,14 +2567,14 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV32VB-PACK-NEXT: lbu a3, 44(a0) ; RV32VB-PACK-NEXT: lbu a4, 55(a0) ; RV32VB-PACK-NEXT: lbu a5, 75(a0) -; RV32VB-PACK-NEXT: lbu a6, 82(a0) -; RV32VB-PACK-NEXT: lbu a7, 93(a0) -; RV32VB-PACK-NEXT: lbu t0, 105(a0) -; RV32VB-PACK-NEXT: lbu a0, 161(a0) ; RV32VB-PACK-NEXT: packh a3, a3, a4 ; RV32VB-PACK-NEXT: packh a1, a1, a5 -; RV32VB-PACK-NEXT: packh a4, a6, a7 -; RV32VB-PACK-NEXT: packh a0, t0, a0 +; RV32VB-PACK-NEXT: lbu a4, 82(a0) +; RV32VB-PACK-NEXT: lbu a5, 93(a0) +; RV32VB-PACK-NEXT: lbu a6, 105(a0) +; RV32VB-PACK-NEXT: lbu a0, 161(a0) +; RV32VB-PACK-NEXT: packh a4, a4, a5 +; RV32VB-PACK-NEXT: packh a0, a6, a0 ; RV32VB-PACK-NEXT: packh a5, a0, a0 ; RV32VB-PACK-NEXT: packh a2, a0, a2 ; RV32VB-PACK-NEXT: pack a2, a5, a2 @@ -2591,15 +2595,13 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV64V-ONLY-NEXT: lbu a3, 44(a0) ; RV64V-ONLY-NEXT: lbu a4, 55(a0) ; RV64V-ONLY-NEXT: lbu a5, 75(a0) -; RV64V-ONLY-NEXT: li a6, 255 -; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.s.x v0, a6 ; RV64V-ONLY-NEXT: lbu a6, 82(a0) ; RV64V-ONLY-NEXT: lbu a7, 93(a0) ; RV64V-ONLY-NEXT: lbu t0, 105(a0) ; RV64V-ONLY-NEXT: lbu a0, 161(a0) -; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a2 +; RV64V-ONLY-NEXT: li a2, 255 ; RV64V-ONLY-NEXT: vmv.v.x v9, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7 @@ -2609,65 +2611,68 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a0 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v9, 4 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64V-ONLY-NEXT: vmv.s.x v0, a2 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v16i8_undef_edges: ; RVA22U64: # %bb.0: +; RVA22U64-NEXT: lbu a1, 623(a0) ; RVA22U64-NEXT: lbu a6, 31(a0) -; RVA22U64-NEXT: lbu a2, 44(a0) -; RVA22U64-NEXT: lbu a3, 55(a0) -; RVA22U64-NEXT: lbu a4, 623(a0) +; RVA22U64-NEXT: lbu a3, 44(a0) +; RVA22U64-NEXT: lbu a4, 55(a0) ; RVA22U64-NEXT: lbu a5, 75(a0) -; RVA22U64-NEXT: slli a2, a2, 32 -; RVA22U64-NEXT: slli a3, a3, 40 -; RVA22U64-NEXT: slli a4, a4, 48 +; RVA22U64-NEXT: slli a3, a3, 32 +; RVA22U64-NEXT: slli a4, a4, 40 +; RVA22U64-NEXT: slli a1, a1, 48 ; RVA22U64-NEXT: slli a5, a5, 56 -; RVA22U64-NEXT: or a2, a2, a3 -; RVA22U64-NEXT: lbu a3, 82(a0) -; RVA22U64-NEXT: lbu a1, 93(a0) -; RVA22U64-NEXT: or a4, a4, a5 -; RVA22U64-NEXT: lbu a5, 105(a0) +; RVA22U64-NEXT: or a3, a3, a4 +; RVA22U64-NEXT: or a1, a1, a5 +; RVA22U64-NEXT: lbu a4, 82(a0) +; RVA22U64-NEXT: lbu a5, 93(a0) +; RVA22U64-NEXT: lbu a2, 105(a0) ; RVA22U64-NEXT: lbu a0, 161(a0) -; RVA22U64-NEXT: slli a1, a1, 8 -; RVA22U64-NEXT: or a1, a1, a3 -; RVA22U64-NEXT: slli a5, a5, 16 +; RVA22U64-NEXT: slli a5, a5, 8 +; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: slli a2, a2, 16 ; RVA22U64-NEXT: slli a0, a0, 24 -; RVA22U64-NEXT: or a0, a0, a5 +; RVA22U64-NEXT: or a0, a0, a2 ; RVA22U64-NEXT: slli a6, a6, 24 -; RVA22U64-NEXT: or a2, a2, a4 -; RVA22U64-NEXT: add.uw a2, a6, a2 -; RVA22U64-NEXT: or a0, a0, a1 +; RVA22U64-NEXT: or a1, a1, a3 +; RVA22U64-NEXT: add.uw a1, a6, a1 +; RVA22U64-NEXT: or a0, a0, a4 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RVA22U64-NEXT: vmv.v.x v8, a2 +; RVA22U64-NEXT: vmv.v.x v8, a1 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; ; RVA22U64-PACK-LABEL: buildvec_v16i8_undef_edges: ; RVA22U64-PACK: # %bb.0: -; RVA22U64-PACK-NEXT: lbu a7, 623(a0) -; RVA22U64-PACK-NEXT: lbu a6, 31(a0) -; RVA22U64-PACK-NEXT: lbu t0, 44(a0) +; RVA22U64-PACK-NEXT: lbu a1, 623(a0) +; RVA22U64-PACK-NEXT: lbu a2, 31(a0) +; RVA22U64-PACK-NEXT: lbu a3, 44(a0) ; RVA22U64-PACK-NEXT: lbu a4, 55(a0) ; RVA22U64-PACK-NEXT: lbu a5, 75(a0) -; RVA22U64-PACK-NEXT: lbu a2, 82(a0) -; RVA22U64-PACK-NEXT: lbu a1, 93(a0) +; RVA22U64-PACK-NEXT: packh a6, a3, a4 +; RVA22U64-PACK-NEXT: packh a1, a1, a5 +; RVA22U64-PACK-NEXT: lbu a4, 82(a0) +; RVA22U64-PACK-NEXT: lbu a5, 93(a0) ; RVA22U64-PACK-NEXT: lbu a3, 105(a0) ; RVA22U64-PACK-NEXT: lbu a0, 161(a0) -; RVA22U64-PACK-NEXT: packh a4, t0, a4 -; RVA22U64-PACK-NEXT: packh a5, a7, a5 -; RVA22U64-PACK-NEXT: packh a1, a2, a1 +; RVA22U64-PACK-NEXT: packh a4, a4, a5 ; RVA22U64-PACK-NEXT: packh a0, a3, a0 -; RVA22U64-PACK-NEXT: packh a2, a0, a0 -; RVA22U64-PACK-NEXT: packh a3, a0, a6 -; RVA22U64-PACK-NEXT: packw a3, a2, a3 -; RVA22U64-PACK-NEXT: packw a2, a2, a2 -; RVA22U64-PACK-NEXT: packw a4, a4, a5 -; RVA22U64-PACK-NEXT: packw a0, a1, a0 -; RVA22U64-PACK-NEXT: pack a1, a3, a4 +; RVA22U64-PACK-NEXT: packh a3, a0, a0 +; RVA22U64-PACK-NEXT: packh a2, a0, a2 +; RVA22U64-PACK-NEXT: packw a2, a3, a2 +; RVA22U64-PACK-NEXT: packw a3, a3, a3 +; RVA22U64-PACK-NEXT: packw a1, a6, a1 +; RVA22U64-PACK-NEXT: packw a0, a4, a0 +; RVA22U64-PACK-NEXT: pack a1, a2, a1 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RVA22U64-PACK-NEXT: vmv.v.x v8, a1 -; RVA22U64-PACK-NEXT: pack a0, a0, a2 +; RVA22U64-PACK-NEXT: pack a0, a0, a3 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; @@ -2678,15 +2683,13 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV64ZVE32-NEXT: lbu a3, 44(a0) ; RV64ZVE32-NEXT: lbu a4, 55(a0) ; RV64ZVE32-NEXT: lbu a5, 75(a0) -; RV64ZVE32-NEXT: li a6, 255 -; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32-NEXT: vmv.s.x v0, a6 ; RV64ZVE32-NEXT: lbu a6, 82(a0) ; RV64ZVE32-NEXT: lbu a7, 93(a0) ; RV64ZVE32-NEXT: lbu t0, 105(a0) ; RV64ZVE32-NEXT: lbu a0, 161(a0) -; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a2 +; RV64ZVE32-NEXT: li a2, 255 ; RV64ZVE32-NEXT: vmv.v.x v9, a6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7 @@ -2696,6 +2699,9 @@ define <16 x i8> @buildvec_v16i8_undef_edges(ptr %p) { ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a0 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a5 ; RV64ZVE32-NEXT: vslidedown.vi v8, v9, 4 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a2 +; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ret %p4 = getelementptr i8, ptr %p, i32 31 @@ -2741,13 +2747,11 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV32-ONLY-NEXT: lbu a6, 82(a0) ; RV32-ONLY-NEXT: lbu a7, 93(a0) ; RV32-ONLY-NEXT: lbu t0, 124(a0) -; RV32-ONLY-NEXT: li t1, 255 -; RV32-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV32-ONLY-NEXT: vmv.s.x v0, t1 ; RV32-ONLY-NEXT: lbu t1, 144(a0) ; RV32-ONLY-NEXT: lbu a0, 154(a0) -; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV32-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-ONLY-NEXT: vmv.v.x v8, a1 +; RV32-ONLY-NEXT: li a1, 255 ; RV32-ONLY-NEXT: vmv.v.x v9, a6 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a7 @@ -2761,37 +2765,40 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, t1 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a0 +; RV32-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV32-ONLY-NEXT: vmv.s.x v0, a1 +; RV32-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV32-ONLY-NEXT: ret ; ; RV32VB-LABEL: buildvec_v16i8_loads_undef_scattered: ; RV32VB: # %bb.0: -; RV32VB-NEXT: lbu a1, 1(a0) -; RV32VB-NEXT: lbu a2, 0(a0) +; RV32VB-NEXT: lbu a1, 0(a0) +; RV32VB-NEXT: lbu a2, 1(a0) ; RV32VB-NEXT: lbu a3, 44(a0) ; RV32VB-NEXT: lbu a4, 55(a0) -; RV32VB-NEXT: slli a1, a1, 8 -; RV32VB-NEXT: or a1, a2, a1 -; RV32VB-NEXT: lbu a2, 75(a0) -; RV32VB-NEXT: lbu a5, 82(a0) -; RV32VB-NEXT: lbu a6, 93(a0) -; RV32VB-NEXT: lbu a7, 124(a0) +; RV32VB-NEXT: slli a2, a2, 8 ; RV32VB-NEXT: slli a4, a4, 8 +; RV32VB-NEXT: or a1, a1, a2 ; RV32VB-NEXT: or a3, a3, a4 -; RV32VB-NEXT: lbu a4, 144(a0) +; RV32VB-NEXT: lbu a2, 75(a0) +; RV32VB-NEXT: lbu a4, 82(a0) +; RV32VB-NEXT: lbu a5, 93(a0) +; RV32VB-NEXT: lbu a6, 124(a0) +; RV32VB-NEXT: slli a5, a5, 8 +; RV32VB-NEXT: or a4, a4, a5 +; RV32VB-NEXT: lbu a5, 144(a0) ; RV32VB-NEXT: lbu a0, 154(a0) -; RV32VB-NEXT: slli a6, a6, 8 -; RV32VB-NEXT: or a5, a5, a6 -; RV32VB-NEXT: slli a4, a4, 16 +; RV32VB-NEXT: slli a5, a5, 16 ; RV32VB-NEXT: slli a0, a0, 24 -; RV32VB-NEXT: or a0, a0, a4 +; RV32VB-NEXT: or a0, a0, a5 ; RV32VB-NEXT: slli a2, a2, 24 ; RV32VB-NEXT: or a2, a3, a2 -; RV32VB-NEXT: or a0, a7, a0 +; RV32VB-NEXT: or a0, a6, a0 ; RV32VB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-NEXT: vmv.v.x v8, a1 ; RV32VB-NEXT: vslide1down.vx v8, v8, a2 -; RV32VB-NEXT: vslide1down.vx v8, v8, a5 +; RV32VB-NEXT: vslide1down.vx v8, v8, a4 ; RV32VB-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-NEXT: ret ; @@ -2801,26 +2808,26 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV32VB-PACK-NEXT: lbu a2, 1(a0) ; RV32VB-PACK-NEXT: lbu a3, 44(a0) ; RV32VB-PACK-NEXT: lbu a4, 55(a0) -; RV32VB-PACK-NEXT: lbu a5, 75(a0) -; RV32VB-PACK-NEXT: lbu a6, 82(a0) -; RV32VB-PACK-NEXT: lbu a7, 93(a0) ; RV32VB-PACK-NEXT: packh a1, a1, a2 -; RV32VB-PACK-NEXT: lbu a2, 144(a0) -; RV32VB-PACK-NEXT: lbu t0, 154(a0) -; RV32VB-PACK-NEXT: packh a3, a3, a4 -; RV32VB-PACK-NEXT: lbu a0, 124(a0) -; RV32VB-PACK-NEXT: packh a4, a6, a7 -; RV32VB-PACK-NEXT: packh a2, a2, t0 -; RV32VB-PACK-NEXT: packh a5, a0, a5 -; RV32VB-PACK-NEXT: pack a3, a3, a5 -; RV32VB-PACK-NEXT: packh a5, a0, a0 -; RV32VB-PACK-NEXT: packh a0, a0, a0 -; RV32VB-PACK-NEXT: pack a0, a0, a2 -; RV32VB-PACK-NEXT: pack a1, a1, a5 +; RV32VB-PACK-NEXT: packh a2, a3, a4 +; RV32VB-PACK-NEXT: lbu a3, 75(a0) +; RV32VB-PACK-NEXT: lbu a4, 82(a0) +; RV32VB-PACK-NEXT: lbu a5, 93(a0) +; RV32VB-PACK-NEXT: lbu a6, 124(a0) +; RV32VB-PACK-NEXT: packh a4, a4, a5 +; RV32VB-PACK-NEXT: lbu a5, 144(a0) +; RV32VB-PACK-NEXT: lbu a0, 154(a0) +; RV32VB-PACK-NEXT: packh a0, a5, a0 +; RV32VB-PACK-NEXT: packh a3, a0, a3 +; RV32VB-PACK-NEXT: pack a2, a2, a3 +; RV32VB-PACK-NEXT: packh a3, a0, a0 +; RV32VB-PACK-NEXT: packh a5, a6, a0 +; RV32VB-PACK-NEXT: pack a0, a5, a0 +; RV32VB-PACK-NEXT: pack a1, a1, a3 ; RV32VB-PACK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32VB-PACK-NEXT: vmv.v.x v8, a1 -; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a3 -; RV32VB-PACK-NEXT: pack a1, a4, a5 +; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a2 +; RV32VB-PACK-NEXT: pack a1, a4, a3 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a1 ; RV32VB-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RV32VB-PACK-NEXT: ret @@ -2835,13 +2842,11 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV64V-ONLY-NEXT: lbu a6, 82(a0) ; RV64V-ONLY-NEXT: lbu a7, 93(a0) ; RV64V-ONLY-NEXT: lbu t0, 124(a0) -; RV64V-ONLY-NEXT: li t1, 255 -; RV64V-ONLY-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.s.x v0, t1 ; RV64V-ONLY-NEXT: lbu t1, 144(a0) ; RV64V-ONLY-NEXT: lbu a0, 154(a0) -; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64V-ONLY-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-ONLY-NEXT: vmv.v.x v8, a1 +; RV64V-ONLY-NEXT: li a1, 255 ; RV64V-ONLY-NEXT: vmv.v.x v9, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a7 @@ -2855,39 +2860,42 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, t1 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a0 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64V-ONLY-NEXT: vmv.s.x v0, a1 +; RV64V-ONLY-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_v16i8_loads_undef_scattered: ; RVA22U64: # %bb.0: -; RVA22U64-NEXT: lbu a1, 1(a0) -; RVA22U64-NEXT: lbu a2, 0(a0) +; RVA22U64-NEXT: lbu a1, 0(a0) +; RVA22U64-NEXT: lbu a2, 1(a0) ; RVA22U64-NEXT: lbu a3, 44(a0) ; RVA22U64-NEXT: lbu a4, 55(a0) -; RVA22U64-NEXT: slli a1, a1, 8 -; RVA22U64-NEXT: or a6, a2, a1 -; RVA22U64-NEXT: lbu a7, 75(a0) -; RVA22U64-NEXT: lbu a5, 82(a0) -; RVA22U64-NEXT: lbu a1, 93(a0) -; RVA22U64-NEXT: lbu a2, 124(a0) +; RVA22U64-NEXT: slli a2, a2, 8 ; RVA22U64-NEXT: slli a3, a3, 32 ; RVA22U64-NEXT: slli a4, a4, 40 +; RVA22U64-NEXT: or a6, a1, a2 ; RVA22U64-NEXT: or a3, a3, a4 -; RVA22U64-NEXT: lbu a4, 144(a0) +; RVA22U64-NEXT: lbu a2, 75(a0) +; RVA22U64-NEXT: lbu a4, 82(a0) +; RVA22U64-NEXT: lbu a5, 93(a0) +; RVA22U64-NEXT: lbu a1, 124(a0) +; RVA22U64-NEXT: slli a5, a5, 8 +; RVA22U64-NEXT: or a4, a4, a5 +; RVA22U64-NEXT: lbu a5, 144(a0) ; RVA22U64-NEXT: lbu a0, 154(a0) -; RVA22U64-NEXT: slli a1, a1, 8 -; RVA22U64-NEXT: or a1, a1, a5 -; RVA22U64-NEXT: slli a4, a4, 48 +; RVA22U64-NEXT: slli a5, a5, 48 ; RVA22U64-NEXT: slli a0, a0, 56 -; RVA22U64-NEXT: or a0, a0, a4 -; RVA22U64-NEXT: slli a7, a7, 56 -; RVA22U64-NEXT: or a3, a7, a3 -; RVA22U64-NEXT: slli a2, a2, 32 -; RVA22U64-NEXT: or a0, a0, a2 -; RVA22U64-NEXT: or a2, a6, a3 +; RVA22U64-NEXT: or a0, a0, a5 +; RVA22U64-NEXT: slli a2, a2, 56 +; RVA22U64-NEXT: or a2, a2, a3 +; RVA22U64-NEXT: slli a1, a1, 32 ; RVA22U64-NEXT: or a0, a0, a1 +; RVA22U64-NEXT: or a1, a6, a2 +; RVA22U64-NEXT: or a0, a0, a4 ; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RVA22U64-NEXT: vmv.v.x v8, a2 +; RVA22U64-NEXT: vmv.v.x v8, a1 ; RVA22U64-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-NEXT: ret ; @@ -2895,29 +2903,29 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: lbu a1, 0(a0) ; RVA22U64-PACK-NEXT: lbu a2, 1(a0) -; RVA22U64-PACK-NEXT: lbu a7, 44(a0) -; RVA22U64-PACK-NEXT: lbu t0, 55(a0) -; RVA22U64-PACK-NEXT: lbu a6, 75(a0) -; RVA22U64-PACK-NEXT: lbu a5, 82(a0) -; RVA22U64-PACK-NEXT: lbu a3, 93(a0) -; RVA22U64-PACK-NEXT: packh t1, a1, a2 -; RVA22U64-PACK-NEXT: lbu a2, 144(a0) -; RVA22U64-PACK-NEXT: lbu a4, 154(a0) -; RVA22U64-PACK-NEXT: packh a1, a7, t0 -; RVA22U64-PACK-NEXT: lbu a0, 124(a0) -; RVA22U64-PACK-NEXT: packh a3, a5, a3 -; RVA22U64-PACK-NEXT: packh a2, a2, a4 -; RVA22U64-PACK-NEXT: packh a4, a0, a6 -; RVA22U64-PACK-NEXT: packw a1, a1, a4 -; RVA22U64-PACK-NEXT: packh a4, a0, a0 -; RVA22U64-PACK-NEXT: packh a0, a0, a0 -; RVA22U64-PACK-NEXT: packw a5, t1, a4 -; RVA22U64-PACK-NEXT: packw a0, a0, a2 -; RVA22U64-PACK-NEXT: packw a2, a3, a4 -; RVA22U64-PACK-NEXT: pack a1, a5, a1 -; RVA22U64-PACK-NEXT: pack a0, a2, a0 +; RVA22U64-PACK-NEXT: lbu a3, 44(a0) +; RVA22U64-PACK-NEXT: lbu a4, 55(a0) +; RVA22U64-PACK-NEXT: packh a6, a1, a2 +; RVA22U64-PACK-NEXT: packh a2, a3, a4 +; RVA22U64-PACK-NEXT: lbu a3, 75(a0) +; RVA22U64-PACK-NEXT: lbu a4, 82(a0) +; RVA22U64-PACK-NEXT: lbu a5, 93(a0) +; RVA22U64-PACK-NEXT: lbu a1, 124(a0) +; RVA22U64-PACK-NEXT: packh a4, a4, a5 +; RVA22U64-PACK-NEXT: lbu a5, 144(a0) +; RVA22U64-PACK-NEXT: lbu a0, 154(a0) +; RVA22U64-PACK-NEXT: packh a0, a5, a0 +; RVA22U64-PACK-NEXT: packh a3, a0, a3 +; RVA22U64-PACK-NEXT: packw a2, a2, a3 +; RVA22U64-PACK-NEXT: packh a3, a0, a0 +; RVA22U64-PACK-NEXT: packh a1, a1, a0 +; RVA22U64-PACK-NEXT: packw a5, a6, a3 +; RVA22U64-PACK-NEXT: packw a0, a1, a0 +; RVA22U64-PACK-NEXT: packw a1, a4, a3 +; RVA22U64-PACK-NEXT: pack a2, a5, a2 +; RVA22U64-PACK-NEXT: pack a0, a1, a0 ; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RVA22U64-PACK-NEXT: vmv.v.x v8, a1 +; RVA22U64-PACK-NEXT: vmv.v.x v8, a2 ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a0 ; RVA22U64-PACK-NEXT: ret ; @@ -2931,13 +2939,11 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV64ZVE32-NEXT: lbu a6, 82(a0) ; RV64ZVE32-NEXT: lbu a7, 93(a0) ; RV64ZVE32-NEXT: lbu t0, 124(a0) -; RV64ZVE32-NEXT: li t1, 255 -; RV64ZVE32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; RV64ZVE32-NEXT: vmv.s.x v0, t1 ; RV64ZVE32-NEXT: lbu t1, 144(a0) ; RV64ZVE32-NEXT: lbu a0, 154(a0) -; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; RV64ZVE32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64ZVE32-NEXT: vmv.v.x v8, a1 +; RV64ZVE32-NEXT: li a1, 255 ; RV64ZVE32-NEXT: vmv.v.x v9, a6 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a7 @@ -2951,6 +2957,9 @@ define <16 x i8> @buildvec_v16i8_loads_undef_scattered(ptr %p) { ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, t1 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v0, a1 +; RV64ZVE32-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 8, v0.t ; RV64ZVE32-NEXT: ret %p2 = getelementptr i8, ptr %p, i32 1 @@ -3011,13 +3020,13 @@ define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 % ; RV32-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV32-ONLY-NEXT: vmv.v.x v8, a0 ; RV32-ONLY-NEXT: vmv.v.x v9, a4 -; RV32-ONLY-NEXT: vmv.v.i v0, 15 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a5 ; RV32-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV32-ONLY-NEXT: vslide1down.vx v9, v9, a6 ; RV32-ONLY-NEXT: vslide1down.vx v10, v8, a3 ; RV32-ONLY-NEXT: vslide1down.vx v8, v9, a7 +; RV32-ONLY-NEXT: vmv.v.i v0, 15 ; RV32-ONLY-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV32-ONLY-NEXT: ret ; @@ -3064,13 +3073,13 @@ define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 % ; RV64V-ONLY-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64V-ONLY-NEXT: vmv.v.x v8, a0 ; RV64V-ONLY-NEXT: vmv.v.x v9, a4 -; RV64V-ONLY-NEXT: vmv.v.i v0, 15 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a5 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 ; RV64V-ONLY-NEXT: vslide1down.vx v9, v9, a6 ; RV64V-ONLY-NEXT: vslide1down.vx v10, v8, a3 ; RV64V-ONLY-NEXT: vslide1down.vx v8, v9, a7 +; RV64V-ONLY-NEXT: vmv.v.i v0, 15 ; RV64V-ONLY-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64V-ONLY-NEXT: ret ; @@ -3119,13 +3128,13 @@ define <8 x i8> @buildvec_v8i8_pack(i8 %e1, i8 %e2, i8 %e3, i8 %e4, i8 %e5, i8 % ; RV64ZVE32-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; RV64ZVE32-NEXT: vmv.v.x v8, a0 ; RV64ZVE32-NEXT: vmv.v.x v9, a4 -; RV64ZVE32-NEXT: vmv.v.i v0, 15 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a5 ; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32-NEXT: vslide1down.vx v9, v9, a6 ; RV64ZVE32-NEXT: vslide1down.vx v10, v8, a3 ; RV64ZVE32-NEXT: vslide1down.vx v8, v9, a7 +; RV64ZVE32-NEXT: vmv.v.i v0, 15 ; RV64ZVE32-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32-NEXT: ret %v1 = insertelement <8 x i8> poison, i8 %e1, i32 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index beaf75d5b0cfa..f01ead3fea62f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -54,7 +54,6 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; V128-NEXT: csrr a0, vlenb ; V128-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; V128-NEXT: vid.v v10 -; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: srli a0, a0, 3 ; V128-NEXT: vsrl.vi v10, v10, 1 ; V128-NEXT: vslidedown.vx v11, v10, a0 @@ -63,6 +62,7 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; V128-NEXT: vrgatherei16.vv v12, v9, v10 ; V128-NEXT: vrgatherei16.vv v15, v8, v11 ; V128-NEXT: vrgatherei16.vv v14, v8, v10 +; V128-NEXT: vmv.v.i v0, 10 ; V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; V128-NEXT: vmerge.vvm v8, v14, v12, v0 ; V128-NEXT: ret @@ -72,9 +72,9 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; RV32-V512-NEXT: vsetivli zero, 4, e16, mf4, ta, ma ; RV32-V512-NEXT: vid.v v10 ; RV32-V512-NEXT: vsrl.vi v11, v10, 1 -; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; RV32-V512-NEXT: vrgatherei16.vv v10, v8, v11 +; RV32-V512-NEXT: vmv.v.i v0, 10 ; RV32-V512-NEXT: vrgatherei16.vv v10, v9, v11, v0.t ; RV32-V512-NEXT: vmv.v.v v8, v10 ; RV32-V512-NEXT: ret @@ -84,8 +84,8 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) { ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, mu ; RV64-V512-NEXT: vid.v v10 ; RV64-V512-NEXT: vsrl.vi v11, v10, 1 -; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v8, v11 +; RV64-V512-NEXT: vmv.v.i v0, 10 ; RV64-V512-NEXT: vrgather.vv v10, v9, v11, v0.t ; RV64-V512-NEXT: vmv.v.v v8, v10 ; RV64-V512-NEXT: ret @@ -421,8 +421,8 @@ define <64 x i32> @interleave_v32i32(<32 x i32> %x, <32 x i32> %y) { ; V128-NEXT: vzext.vf2 v8, v24 ; V128-NEXT: addi a1, a1, -1366 ; V128-NEXT: vzext.vf2 v24, v0 -; V128-NEXT: vmv.s.x v0, a1 ; V128-NEXT: vsll.vx v8, v8, a0 +; V128-NEXT: vmv.s.x v0, a1 ; V128-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; V128-NEXT: vmerge.vvm v24, v24, v8, v0 ; V128-NEXT: addi a0, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index d7120b4a16938..af2ac99354db1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -86,8 +86,8 @@ define <4 x i16> @vrgather_shuffle_vv_v4i16(<4 x i16> %x, <4 x i16> %y) { ; CHECK-NEXT: addi a0, a0, %lo(.LCPI6_0) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, mu ; CHECK-NEXT: vle16.v v11, (a0) -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vi v10, v9, 1, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -155,20 +155,18 @@ define <8 x i64> @vrgather_permute_shuffle_uv_v8i64(<8 x i64> %x) { define <8 x i64> @vrgather_shuffle_vv_v8i64(<8 x i64> %x, <8 x i64> %y) { ; RV32-LABEL: vrgather_shuffle_vv_v8i64: ; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vmv.v.i v16, 2 +; RV32-NEXT: li a0, 5 +; RV32-NEXT: vslide1down.vx v20, v16, a0 ; RV32-NEXT: lui a0, %hi(.LCPI11_0) ; RV32-NEXT: addi a0, a0, %lo(.LCPI11_0) -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v20, (a0) -; RV32-NEXT: vmv.v.i v21, 2 +; RV32-NEXT: vle16.v v21, (a0) ; RV32-NEXT: li a0, 164 -; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v8, v20 -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: li a0, 5 -; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32-NEXT: vslide1down.vx v8, v21, a0 ; RV32-NEXT: vsetvli zero, zero, e64, m4, ta, mu -; RV32-NEXT: vrgatherei16.vv v16, v12, v8, v0.t +; RV32-NEXT: vrgatherei16.vv v16, v8, v21 +; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vrgatherei16.vv v16, v12, v20, v0.t ; RV32-NEXT: vmv.v.v v8, v16 ; RV32-NEXT: ret ; @@ -211,8 +209,8 @@ define <8 x i64> @vrgather_shuffle_xv_v8i64(<8 x i64> %x) { ; RV32-NEXT: addi a0, a0, %lo(.LCPI12_1) ; RV32-NEXT: vle16.v v21, (a0) ; RV32-NEXT: li a0, 113 -; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vrgatherei16.vv v12, v16, v20 +; RV32-NEXT: vmv.s.x v0, a0 ; RV32-NEXT: vrgatherei16.vv v12, v8, v21, v0.t ; RV32-NEXT: vmv.v.v v8, v12 ; RV32-NEXT: ret @@ -365,10 +363,10 @@ define <8 x i8> @splat_ve4_ins_i1ve3(<8 x i8> %v) { define <8 x i8> @splat_ve2_we0(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, 66 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v8, 2 +; CHECK-NEXT: li a0, 66 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -385,9 +383,9 @@ define <8 x i8> @splat_ve2_we0_ins_i0ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; CHECK-NEXT: vmv.s.x v11, a0 ; CHECK-NEXT: li a0, 66 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -400,10 +398,10 @@ define <8 x i8> @splat_ve2_we0_ins_i0we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vrgather.vi v10, v8, 2 -; CHECK-NEXT: li a0, 67 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v8, 4 +; CHECK-NEXT: li a0, 67 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v9, v8, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 @@ -420,9 +418,9 @@ define <8 x i8> @splat_ve2_we0_ins_i2ve4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.x v11, a0 ; CHECK-NEXT: li a0, 66 -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vrgather.vv v10, v8, v11 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgather.vi v10, v9, 0, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret @@ -434,16 +432,16 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i2we4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 4 +; CHECK-NEXT: vmv.v.i v11, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vrgather.vi v10, v8, 2 ; CHECK-NEXT: li a0, 70 ; CHECK-NEXT: vsetivli zero, 3, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v11, v10, 2 +; CHECK-NEXT: vslideup.vi v12, v11, 2 ; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v10, v8, 2 -; CHECK-NEXT: vrgather.vv v10, v9, v11, v0.t +; CHECK-NEXT: vrgather.vv v10, v9, v12, v0.t ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -453,13 +451,13 @@ define <8 x i8> @splat_ve2_we0_ins_i2we4(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @splat_ve2_we0_ins_i2ve4_i5we6(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: splat_ve2_we0_ins_i2ve4_i5we6: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI26_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI26_0) -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: li a0, 20 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI26_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI26_0) ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: ret %shuff = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -670,10 +668,10 @@ define <8 x i8> @merge_slidedown(<8 x i8> %v, <8 x i8> %w) { define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: merge_non_contiguous_slideup_slidedown: ; CHECK: # %bb.0: -; CHECK-NEXT: li a0, -22 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslidedown.vi v8, v8, 2 +; CHECK-NEXT: li a0, -22 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vslideup.vi v8, v9, 1, v0.t ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -684,13 +682,13 @@ define <8 x i8> @merge_non_contiguous_slideup_slidedown(<8 x i8> %v, <8 x i8> %w define <8 x i8> @unmergable(<8 x i8> %v, <8 x i8> %w) { ; CHECK-LABEL: unmergable: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI46_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: li a0, 84 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI46_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI46_0) ; CHECK-NEXT: vmerge.vvm v9, v9, v8, v0 +; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: vrgather.vv v8, v9, v10 ; CHECK-NEXT: ret %res = shufflevector <8 x i8> %v, <8 x i8> %w, <8 x i32> @@ -1080,18 +1078,18 @@ define <16 x i64> @shuffle_zipodd_v16i64(<16 x i64> %v1, <16 x i64> %v2) { define <16 x i32> @shuffle_disjoint_lanes(<16 x i32> %v, <16 x i32> %w) { ; CHECK-LABEL: shuffle_disjoint_lanes: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI74_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI74_0) -; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vle8.v v16, (a0) ; CHECK-NEXT: lui a0, 11 ; CHECK-NEXT: addi a0, a0, -1366 +; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI74_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI74_0) ; CHECK-NEXT: vmerge.vvm v12, v12, v8, v0 +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vsext.vf2 v18, v16 +; CHECK-NEXT: vsext.vf2 v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vrgatherei16.vv v8, v12, v18 +; CHECK-NEXT: vrgatherei16.vv v8, v12, v16 ; CHECK-NEXT: ret %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> ret <16 x i32> %out @@ -1118,12 +1116,12 @@ define <16 x i32> @shuffle_disjoint_lanes_one_broadcast(<16 x i32> %v, <16 x i32 ; CHECK-NEXT: lui a0, %hi(.LCPI76_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI76_0) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vle16.v v20, (a0) +; CHECK-NEXT: vrgather.vi v16, v8, 7 +; CHECK-NEXT: vle16.v v8, (a0) ; CHECK-NEXT: lui a0, 15 ; CHECK-NEXT: addi a0, a0, 240 ; CHECK-NEXT: vmv.s.x v0, a0 -; CHECK-NEXT: vrgather.vi v16, v8, 7 -; CHECK-NEXT: vrgatherei16.vv v16, v12, v20, v0.t +; CHECK-NEXT: vrgatherei16.vv v16, v12, v8, v0.t ; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: ret %out = shufflevector <16 x i32> %v, <16 x i32> %w, <16 x i32> @@ -1133,14 +1131,14 @@ define <16 x i32> @shuffle_disjoint_lanes_one_broadcast(<16 x i32> %v, <16 x i32 define <16 x i32> @shuffle_disjoint_lanes_one_splat(i32 %v, <16 x i32> %w) { ; CHECK-LABEL: shuffle_disjoint_lanes_one_splat: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI77_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI77_0) ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, mu -; CHECK-NEXT: vle16.v v16, (a1) -; CHECK-NEXT: lui a1, 15 -; CHECK-NEXT: addi a1, a1, 240 -; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmv.v.x v12, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI77_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI77_0) +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: lui a0, 15 +; CHECK-NEXT: addi a0, a0, 240 +; CHECK-NEXT: vmv.s.x v0, a0 ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16, v0.t ; CHECK-NEXT: vmv.v.v v8, v12 ; CHECK-NEXT: ret @@ -1269,14 +1267,14 @@ define void @shuffle_i128_ldst(ptr %p) { define void @shuffle_i256_ldst(ptr %p) { ; CHECK-LABEL: shuffle_i256_ldst: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma +; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: lui a1, %hi(.LCPI80_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI80_0) -; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vsext.vf2 v10, v8 +; CHECK-NEXT: vle8.v v16, (a1) +; CHECK-NEXT: vsext.vf2 v18, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vrgatherei16.vv v24, v16, v10 +; CHECK-NEXT: vrgatherei16.vv v24, v8, v18 ; CHECK-NEXT: vse64.v v24, (a0) ; CHECK-NEXT: ret %a = load <4 x i256>, ptr %p @@ -1393,8 +1391,8 @@ define <16 x i32> @shuffle_m2_prefix(<16 x i32> %a) { ; CHECK-NEXT: lui a0, %hi(.LCPI85_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI85_0) ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle16.v v14, (a0) -; CHECK-NEXT: vrgatherei16.vv v12, v8, v14 +; CHECK-NEXT: vle16.v v10, (a0) +; CHECK-NEXT: vrgatherei16.vv v12, v8, v10 ; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %out = shufflevector <16 x i32> %a, <16 x i32> poison, <16 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll index 32c1f2ca32fab..a5e730d47395d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-vrgather.ll @@ -77,10 +77,10 @@ define void @gather_const_v2i64(ptr %x) { define void @gather_const_v64i8(ptr %x) { ; CHECK-LABEL: gather_const_v64i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lbu a1, 32(a0) -; CHECK-NEXT: li a2, 64 -; CHECK-NEXT: vsetvli zero, a2, e8, m4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: li a1, 64 +; CHECK-NEXT: lbu a2, 32(a0) +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a2 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <64 x i8>, ptr %x @@ -94,10 +94,10 @@ define void @gather_const_v64i8(ptr %x) { define void @gather_const_v16i16(ptr %x) { ; CHECK-LABEL: gather_const_v16i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lh a1, 50(a0) -; CHECK-NEXT: li a2, 32 -; CHECK-NEXT: vsetvli zero, a2, e16, m4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: li a1, 32 +; CHECK-NEXT: lh a2, 50(a0) +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vmv.v.x v8, a2 ; CHECK-NEXT: vse16.v v8, (a0) ; CHECK-NEXT: ret %a = load <32 x i16>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 392709fdb4cf7..e6514cfe7d473 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1046,46 +1046,45 @@ define void @mulhu_v16i8(ptr %x) { ; CHECK-LABEL: mulhu_v16i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vle8.v v10, (a0) ; CHECK-NEXT: lui a1, 3 -; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: lui a2, %hi(.LCPI65_0) -; CHECK-NEXT: addi a2, a2, %lo(.LCPI65_0) -; CHECK-NEXT: vle8.v v11, (a2) +; CHECK-NEXT: vmv.v.i v11, 0 +; CHECK-NEXT: li a2, 513 +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: addi a1, a1, -2044 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: addi a1, a2, 32 -; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vmv.s.x v9, a1 ; CHECK-NEXT: li a1, -128 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vxm v12, v10, a1, v0 -; CHECK-NEXT: li a1, 513 -; CHECK-NEXT: vmv.v.i v13, 4 +; CHECK-NEXT: vmerge.vxm v12, v11, a1, v0 +; CHECK-NEXT: lui a1, %hi(.LCPI65_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI65_0) +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: vmerge.vim v9, v11, 1, v0 +; CHECK-NEXT: vmv.v.i v11, 4 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: vmerge.vim v8, v11, 1, v0 +; CHECK-NEXT: vle8.v v11, (a1) ; CHECK-NEXT: addi a1, a2, 78 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vmerge.vim v10, v13, 1, v0 +; CHECK-NEXT: vsrl.vv v9, v10, v9 +; CHECK-NEXT: vmulhu.vv v9, v9, v11 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vsrl.vv v8, v9, v8 -; CHECK-NEXT: vmulhu.vv v8, v8, v11 -; CHECK-NEXT: vmerge.vim v10, v10, 3, v0 +; CHECK-NEXT: vmerge.vim v8, v8, 3, v0 ; CHECK-NEXT: lui a1, 8 ; CHECK-NEXT: addi a1, a1, 304 -; CHECK-NEXT: vsub.vv v9, v9, v8 -; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vsub.vv v10, v10, v9 +; CHECK-NEXT: vmulhu.vv v10, v10, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; CHECK-NEXT: vadd.vv v8, v9, v8 -; CHECK-NEXT: vmerge.vim v9, v10, 2, v0 -; CHECK-NEXT: vsrl.vv v8, v8, v9 +; CHECK-NEXT: vadd.vv v9, v10, v9 +; CHECK-NEXT: vmerge.vim v8, v8, 2, v0 +; CHECK-NEXT: vsrl.vv v8, v9, v8 ; CHECK-NEXT: vse8.v v8, (a0) ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %x @@ -1108,20 +1107,20 @@ define void @mulhu_v8i16(ptr %x) { ; CHECK-NEXT: addi a1, a1, %lo(.LCPI66_0) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vmv.v.i v11, 3 -; CHECK-NEXT: vle16.v v12, (a1) -; CHECK-NEXT: vmerge.vim v11, v11, 2, v0 -; CHECK-NEXT: vmv1r.v v13, v9 +; CHECK-NEXT: vmv1r.v v12, v9 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v9, v10, 6 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vmerge.vim v11, v11, 2, v0 +; CHECK-NEXT: vle16.v v13, (a1) ; CHECK-NEXT: vsrl.vv v9, v8, v9 -; CHECK-NEXT: vmulhu.vv v9, v9, v12 +; CHECK-NEXT: vmulhu.vv v9, v9, v13 ; CHECK-NEXT: lui a1, 1048568 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, tu, ma -; CHECK-NEXT: vmv.s.x v13, a1 +; CHECK-NEXT: vmv.s.x v12, a1 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vsub.vv v8, v8, v9 -; CHECK-NEXT: vmulhu.vv v8, v8, v13 +; CHECK-NEXT: vmulhu.vv v8, v8, v12 ; CHECK-NEXT: vadd.vv v8, v8, v9 ; CHECK-NEXT: vsetivli zero, 7, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v11, v10, 6 @@ -1162,13 +1161,13 @@ define void @mulhu_v4i32(ptr %x) { ; CHECK-NEXT: vmv.s.x v10, a1 ; CHECK-NEXT: lui a1, %hi(.LCPI68_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI68_0) -; CHECK-NEXT: vle32.v v11, (a1) ; CHECK-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v9, v10, 2 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vle32.v v10, (a1) ; CHECK-NEXT: lui a1, 4128 ; CHECK-NEXT: addi a1, a1, 514 -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v11 +; CHECK-NEXT: vmulhu.vv v10, v8, v10 ; CHECK-NEXT: vsub.vv v8, v8, v10 ; CHECK-NEXT: vmulhu.vv v8, v8, v9 ; CHECK-NEXT: vmv.s.x v9, a1 @@ -1206,8 +1205,6 @@ define void @mulhu_v2i64(ptr %x) { ; ; RV64-LABEL: mulhu_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, 838861 ; RV64-NEXT: lui a2, 699051 ; RV64-NEXT: addiw a1, a1, -819 @@ -1216,6 +1213,8 @@ define void @mulhu_v2i64(ptr %x) { ; RV64-NEXT: add a1, a1, a3 ; RV64-NEXT: slli a3, a2, 32 ; RV64-NEXT: add a2, a2, a3 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: vmv.v.x v9, a1 ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma ; RV64-NEXT: vmv.s.x v9, a2 @@ -1322,10 +1321,10 @@ define void @mulhs_v4i32(ptr %x) { ; ; RV64-LABEL: mulhs_v4i32: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI73_0) -; RV64-NEXT: ld a1, %lo(.LCPI73_0)(a1) ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: lui a1, %hi(.LCPI73_0) +; RV64-NEXT: ld a1, %lo(.LCPI73_0)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.x v9, a1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma @@ -3153,27 +3152,27 @@ define void @mulhu_v32i8(ptr %x) { ; CHECK-LABEL: mulhu_v32i8: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: lui a2, 163907 -; CHECK-NEXT: addi a2, a2, -2044 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 -; CHECK-NEXT: lui a2, 66049 -; CHECK-NEXT: addi a2, a2, 32 -; CHECK-NEXT: vmv.s.x v8, a2 -; CHECK-NEXT: li a2, -128 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: lui a2, %hi(.LCPI181_0) +; CHECK-NEXT: addi a2, a2, %lo(.LCPI181_0) +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle8.v v10, (a2) +; CHECK-NEXT: lui a1, 163907 +; CHECK-NEXT: addi a1, a1, -2044 +; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: lui a1, 66049 +; CHECK-NEXT: addi a1, a1, 32 +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: li a1, -128 +; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vmv.v.i v12, 0 -; CHECK-NEXT: vmerge.vxm v10, v12, a2, v0 -; CHECK-NEXT: lui a1, %hi(.LCPI181_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI181_0) -; CHECK-NEXT: vle8.v v14, (a0) +; CHECK-NEXT: vmerge.vxm v14, v12, a1, v0 +; CHECK-NEXT: lui a1, 8208 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 -; CHECK-NEXT: vle8.v v12, (a1) -; CHECK-NEXT: lui a1, 8208 +; CHECK-NEXT: vle8.v v12, (a0) ; CHECK-NEXT: addi a1, a1, 513 -; CHECK-NEXT: vsrl.vv v8, v14, v8 -; CHECK-NEXT: vmulhu.vv v12, v8, v12 +; CHECK-NEXT: vsrl.vv v8, v12, v8 +; CHECK-NEXT: vmulhu.vv v10, v8, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: lui a1, 66785 @@ -3181,8 +3180,8 @@ define void @mulhu_v32i8(ptr %x) { ; CHECK-NEXT: vmv.s.x v8, a1 ; CHECK-NEXT: lui a1, 529160 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; CHECK-NEXT: vsub.vv v14, v14, v12 -; CHECK-NEXT: vmulhu.vv v10, v14, v10 +; CHECK-NEXT: vsub.vv v12, v12, v10 +; CHECK-NEXT: vmulhu.vv v12, v12, v14 ; CHECK-NEXT: vmv.v.i v14, 4 ; CHECK-NEXT: addi a1, a1, 304 ; CHECK-NEXT: vmerge.vim v14, v14, 1, v0 @@ -3191,7 +3190,7 @@ define void @mulhu_v32i8(ptr %x) { ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e8, m2, ta, ma ; CHECK-NEXT: vmerge.vim v14, v14, 3, v0 -; CHECK-NEXT: vadd.vv v10, v10, v12 +; CHECK-NEXT: vadd.vv v10, v12, v10 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v8, v14, 2, v0 ; CHECK-NEXT: vsrl.vv v8, v10, v8 @@ -3291,11 +3290,11 @@ define void @mulhu_v8i32(ptr %x) { ; CHECK-NEXT: li a1, 68 ; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmv.s.x v0, a1 +; CHECK-NEXT: lui a1, 524288 +; CHECK-NEXT: vmerge.vxm v10, v10, a1, v0 ; CHECK-NEXT: lui a1, %hi(.LCPI183_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI183_0) ; CHECK-NEXT: vle32.v v12, (a1) -; CHECK-NEXT: lui a1, 524288 -; CHECK-NEXT: vmerge.vxm v10, v10, a1, v0 ; CHECK-NEXT: lui a1, 4128 ; CHECK-NEXT: addi a1, a1, 514 ; CHECK-NEXT: vmulhu.vv v12, v8, v12 @@ -3450,10 +3449,10 @@ define void @mulhs_v8i32(ptr %x) { ; ; RV64-LABEL: mulhs_v8i32: ; RV64: # %bb.0: -; RV64-NEXT: lui a1, %hi(.LCPI187_0) -; RV64-NEXT: ld a1, %lo(.LCPI187_0)(a1) ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) +; RV64-NEXT: lui a1, %hi(.LCPI187_0) +; RV64-NEXT: ld a1, %lo(.LCPI187_0)(a1) ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vmv.v.x v10, a1 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma @@ -3507,6 +3506,8 @@ define void @mulhs_v4i64(ptr %x) { ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vle64.v v8, (a0) ; RV64-NEXT: lui a1, 349525 +; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma +; RV64-NEXT: vmv.v.i v0, 5 ; RV64-NEXT: lui a2, 1044496 ; RV64-NEXT: addiw a1, a1, 1365 ; RV64-NEXT: addi a2, a2, -256 @@ -3514,12 +3515,10 @@ define void @mulhs_v4i64(ptr %x) { ; RV64-NEXT: slli a2, a1, 32 ; RV64-NEXT: add a1, a1, a2 ; RV64-NEXT: lui a2, %hi(.LCPI188_0) -; RV64-NEXT: ld a2, %lo(.LCPI188_0)(a2) -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.i v0, 5 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vmv.v.x v12, a1 ; RV64-NEXT: li a1, 63 +; RV64-NEXT: ld a2, %lo(.LCPI188_0)(a2) ; RV64-NEXT: vmerge.vxm v12, v12, a2, v0 ; RV64-NEXT: lui a2, 4096 ; RV64-NEXT: addi a2, a2, 256 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll index b65352aed2d52..211c434c65743 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access-zve32x.ll @@ -7,55 +7,53 @@ define <4 x i1> @load_large_vector(ptr %p) { ; ZVE32X-LABEL: load_large_vector: ; ZVE32X: # %bb.0: -; ZVE32X-NEXT: ld a1, 0(a0) -; ZVE32X-NEXT: ld a2, 8(a0) -; ZVE32X-NEXT: ld a3, 24(a0) -; ZVE32X-NEXT: ld a4, 32(a0) -; ZVE32X-NEXT: ld a5, 48(a0) -; ZVE32X-NEXT: ld a6, 56(a0) -; ZVE32X-NEXT: ld a7, 72(a0) -; ZVE32X-NEXT: ld a0, 80(a0) +; ZVE32X-NEXT: ld a1, 48(a0) +; ZVE32X-NEXT: ld a2, 56(a0) +; ZVE32X-NEXT: ld a3, 72(a0) +; ZVE32X-NEXT: ld a4, 80(a0) +; ZVE32X-NEXT: ld a5, 0(a0) +; ZVE32X-NEXT: ld a6, 8(a0) +; ZVE32X-NEXT: ld a7, 24(a0) +; ZVE32X-NEXT: ld a0, 32(a0) ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; ZVE32X-NEXT: vmv.s.x v8, zero ; ZVE32X-NEXT: vmv.v.i v9, 0 -; ZVE32X-NEXT: xor a3, a3, a4 -; ZVE32X-NEXT: xor a1, a1, a2 -; ZVE32X-NEXT: xor a2, a5, a6 ; ZVE32X-NEXT: xor a0, a7, a0 -; ZVE32X-NEXT: snez a3, a3 +; ZVE32X-NEXT: xor a5, a5, a6 +; ZVE32X-NEXT: xor a1, a1, a2 +; ZVE32X-NEXT: xor a3, a3, a4 +; ZVE32X-NEXT: snez a0, a0 +; ZVE32X-NEXT: snez a2, a5 ; ZVE32X-NEXT: snez a1, a1 -; ZVE32X-NEXT: vmv.s.x v10, a3 -; ZVE32X-NEXT: vmv.s.x v11, a1 +; ZVE32X-NEXT: snez a3, a3 +; ZVE32X-NEXT: vmv.s.x v10, a0 +; ZVE32X-NEXT: vmv.s.x v11, a2 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vand.vi v10, v10, 1 +; ZVE32X-NEXT: vand.vi v11, v11, 1 ; ZVE32X-NEXT: vmsne.vi v0, v10, 0 -; ZVE32X-NEXT: vand.vi v10, v11, 1 -; ZVE32X-NEXT: vmerge.vim v11, v8, 1, v0 -; ZVE32X-NEXT: vmsne.vi v0, v10, 0 -; ZVE32X-NEXT: snez a1, a2 +; ZVE32X-NEXT: vmerge.vim v10, v8, 1, v0 +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmerge.vim v10, v9, 1, v0 +; ZVE32X-NEXT: vmerge.vim v11, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 2, e8, mf4, tu, ma -; ZVE32X-NEXT: vslideup.vi v10, v11, 1 -; ZVE32X-NEXT: vmv.s.x v11, a1 -; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v10, 0 +; ZVE32X-NEXT: vslideup.vi v11, v10, 1 +; ZVE32X-NEXT: vmv.s.x v10, a1 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma -; ZVE32X-NEXT: vand.vi v10, v11, 1 +; ZVE32X-NEXT: vand.vi v10, v10, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 ; ZVE32X-NEXT: vmerge.vim v11, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vmsne.vi v0, v10, 0 -; ZVE32X-NEXT: snez a0, a0 ; ZVE32X-NEXT: vmerge.vim v10, v8, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 3, e8, mf4, tu, ma ; ZVE32X-NEXT: vslideup.vi v11, v10, 2 -; ZVE32X-NEXT: vmv.s.x v10, a0 -; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32X-NEXT: vmsne.vi v0, v11, 0 +; ZVE32X-NEXT: vmv.s.x v10, a3 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vand.vi v10, v10, 1 ; ZVE32X-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32X-NEXT: vmsne.vi v0, v11, 0 ; ZVE32X-NEXT: vmerge.vim v9, v9, 1, v0 ; ZVE32X-NEXT: vsetivli zero, 1, e8, mf4, ta, ma ; ZVE32X-NEXT: vmsne.vi v0, v10, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll index 1516c67bf7ecc..e1f834b263782 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-interleaved-access.ll @@ -183,10 +183,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 88 +; RV32-NEXT: li a3, 84 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: sub sp, sp, a2 -; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb +; RV32-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd4, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 84 * vlenb ; RV32-NEXT: addi a3, a1, 256 ; RV32-NEXT: addi a4, a1, 128 ; RV32-NEXT: li a2, 32 @@ -194,79 +194,127 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: lui a6, %hi(.LCPI8_0) ; RV32-NEXT: addi a6, a6, %lo(.LCPI8_0) ; RV32-NEXT: li a7, 768 -; RV32-NEXT: lui t0, 49164 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: lui a1, 49164 +; RV32-NEXT: vle32.v v24, (a4) +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li t0, 60 +; RV32-NEXT: mul a4, a4, t0 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: lui a4, %hi(.LCPI8_1) +; RV32-NEXT: addi a4, a4, %lo(.LCPI8_1) +; RV32-NEXT: addi a5, a5, 3 +; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma +; RV32-NEXT: vle16.v v8, (a6) +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: li t0, 76 +; RV32-NEXT: mul a6, a6, t0 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs2r.v v8, (a6) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v8, a7 +; RV32-NEXT: csrr a6, vlenb +; RV32-NEXT: li a7, 36 +; RV32-NEXT: mul a6, a6, a7 +; RV32-NEXT: add a6, sp, a6 +; RV32-NEXT: addi a6, a6, 16 +; RV32-NEXT: vs1r.v v8, (a6) # Unknown-size Folded Spill +; RV32-NEXT: addi a1, a1, 12 +; RV32-NEXT: vle16.v v8, (a4) +; RV32-NEXT: csrr a4, vlenb +; RV32-NEXT: li a6, 28 +; RV32-NEXT: mul a4, a4, a6 +; RV32-NEXT: add a4, sp, a4 +; RV32-NEXT: addi a4, a4, 16 +; RV32-NEXT: vs2r.v v8, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vmv.s.x v0, a5 +; RV32-NEXT: vmv.s.x v8, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li t1, 72 -; RV32-NEXT: mul a1, a1, t1 +; RV32-NEXT: li a4, 24 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vle32.v v8, (a4) +; RV32-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vmv8r.v v8, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a4, 68 +; RV32-NEXT: mul a1, a1, a4 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a5, a5, 3 -; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vle16.v v6, (a6) -; RV32-NEXT: vmv.s.x v0, a5 -; RV32-NEXT: lui a1, %hi(.LCPI8_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI8_1) +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v8, v16, v0 +; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 76 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl2r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v24, v16, v6 -; RV32-NEXT: csrr a4, vlenb -; RV32-NEXT: li a5, 48 -; RV32-NEXT: mul a4, a4, a5 -; RV32-NEXT: add a4, sp, a4 -; RV32-NEXT: addi a4, a4, 16 -; RV32-NEXT: vs8r.v v24, (a4) # Unknown-size Folded Spill +; RV32-NEXT: vrgatherei16.vv v0, v16, v24 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a4, 44 +; RV32-NEXT: mul a1, a1, a4 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vle32.v v16, (a3) -; RV32-NEXT: addi t0, t0, 12 -; RV32-NEXT: vmv.s.x v0, a7 -; RV32-NEXT: vmv.s.x v7, t0 -; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v4, (a1) +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 76 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 56 +; RV32-NEXT: li a3, 52 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v20, v24, v16, v0 +; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 36 +; RV32-NEXT: li a3, 20 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v7 +; RV32-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 72 +; RV32-NEXT: li a3, 60 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v24, v8, v16, v0 +; RV32-NEXT: vmerge.vvm v24, v16, v8, v0 +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 28 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v24, v4 +; RV32-NEXT: vrgatherei16.vv v8, v24, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 40 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 @@ -278,23 +326,22 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: slli a1, a1, 10 ; RV32-NEXT: addi a3, a3, 48 ; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vle16.v v14, (a4) ; RV32-NEXT: vmv.s.x v12, a3 +; RV32-NEXT: vle16.v v14, (a4) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 +; RV32-NEXT: li a3, 76 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv4r.v v8, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 56 +; RV32-NEXT: li a3, 52 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v24, v8, v0 +; RV32-NEXT: vmerge.vvm v8, v16, v24, v0 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a3, 24 ; RV32-NEXT: mul a1, a1, a3 @@ -303,7 +350,14 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a3, 68 +; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: csrr a1, vlenb +; RV32-NEXT: li a3, 60 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload @@ -323,326 +377,312 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV32-NEXT: lui a5, 768 ; RV32-NEXT: li a6, 48 ; RV32-NEXT: lui a7, 3073 -; RV32-NEXT: li t0, 192 ; RV32-NEXT: addi a1, a1, 3 ; RV32-NEXT: addi a3, a3, 192 ; RV32-NEXT: addi a4, a4, 12 ; RV32-NEXT: addi a5, a5, 768 ; RV32-NEXT: addi a7, a7, -1024 -; RV32-NEXT: vmv.s.x v13, a6 -; RV32-NEXT: vmv.s.x v2, t0 +; RV32-NEXT: vmv.s.x v2, a6 ; RV32-NEXT: vmv.s.x v0, a1 -; RV32-NEXT: vmv.s.x v12, a3 -; RV32-NEXT: vmv.s.x v3, a4 -; RV32-NEXT: vmv.s.x v14, a5 -; RV32-NEXT: vmv.s.x v1, a7 +; RV32-NEXT: vmv.s.x v8, a3 +; RV32-NEXT: vmv.s.x v20, a4 +; RV32-NEXT: vmv.s.x v1, a5 +; RV32-NEXT: vmv.s.x v3, a7 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 56 +; RV32-NEXT: li a3, 52 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vmv4r.v v8, v16 +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vmv4r.v v16, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 +; RV32-NEXT: li a3, 76 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v20, v8, v16, v0 -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vs4r.v v20, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v12 +; RV32-NEXT: vmerge.vvm v4, v16, v24, v0 +; RV32-NEXT: vmv1r.v v0, v8 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a3, 60 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 72 +; RV32-NEXT: li a3, 68 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v24, v16, v24, v0 -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 12 -; RV32-NEXT: mul a1, a1, a3 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 +; RV32-NEXT: vmerge.vvm v24, v8, v24, v0 +; RV32-NEXT: addi a1, sp, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v3 +; RV32-NEXT: vmv1r.v v0, v20 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 +; RV32-NEXT: li a3, 76 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v4, v8, v24, v0 -; RV32-NEXT: vmv1r.v v0, v14 +; RV32-NEXT: vmerge.vvm v20, v16, v24, v0 +; RV32-NEXT: vmv1r.v v0, v1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 72 +; RV32-NEXT: li a3, 68 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v24, v16, v24, v0 +; RV32-NEXT: vmerge.vvm v24, v8, v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: li a3, 12 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v13 +; RV32-NEXT: vmv1r.v v0, v2 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 80 +; RV32-NEXT: li a3, 76 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v12, v8, v24, v0 +; RV32-NEXT: vmerge.vvm v12, v16, v8, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 20 -; RV32-NEXT: mul a1, a1, a3 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v1 +; RV32-NEXT: vmv1r.v v0, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a3, 72 +; RV32-NEXT: li a3, 68 ; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a3, 60 +; RV32-NEXT: mul a1, a1, a3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vmerge.vvm v16, v24, v16, v0 +; RV32-NEXT: vmerge.vvm v24, v8, v24, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 72 +; RV32-NEXT: li a2, 68 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: vmv1r.v v0, v2 +; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: li a1, 192 +; RV32-NEXT: vmv.s.x v0, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 80 +; RV32-NEXT: li a2, 76 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vmerge.vvm v8, v8, v16, v0 +; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a2, 60 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 32 ; RV32-NEXT: addi a1, a1, 4 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v12, a1 +; RV32-NEXT: vmv.v.x v16, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 36 +; RV32-NEXT: li a2, 20 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v8, v12 +; RV32-NEXT: vrgatherei16.vv v8, v12, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 44 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v16, v8 +; RV32-NEXT: vmv.v.v v8, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 80 +; RV32-NEXT: li a2, 76 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, 48 -; RV32-NEXT: lui a2, %hi(.LCPI8_3) -; RV32-NEXT: addi a2, a2, %lo(.LCPI8_3) ; RV32-NEXT: addi a1, a1, 5 -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v24, (a2) ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v25, a1 +; RV32-NEXT: vmv.v.x v3, a1 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 24 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v8, v25 +; RV32-NEXT: vrgatherei16.vv v8, v12, v3 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 36 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 12, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v16, v8 +; RV32-NEXT: vmv.v.v v8, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 56 +; RV32-NEXT: li a2, 36 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill -; RV32-NEXT: addi a1, sp, 16 -; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: lui a1, %hi(.LCPI8_3) +; RV32-NEXT: addi a1, a1, %lo(.LCPI8_3) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v16, v8, v24 +; RV32-NEXT: vle16.v v24, (a1) +; RV32-NEXT: vrgatherei16.vv v12, v4, v24 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: li a2, 28 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v16, v8 +; RV32-NEXT: vmv.v.v v12, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 40 +; RV32-NEXT: li a2, 52 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill ; RV32-NEXT: lui a1, %hi(.LCPI8_4) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_4) -; RV32-NEXT: lui a2, %hi(.LCPI8_5) -; RV32-NEXT: addi a2, a2, %lo(.LCPI8_5) -; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v26, (a1) -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v24, (a2) -; RV32-NEXT: lui a1, %hi(.LCPI8_6) -; RV32-NEXT: addi a1, a1, %lo(.LCPI8_6) ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle16.v v2, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 12 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v8, v16, v26 +; RV32-NEXT: vle16.v v12, (a1) +; RV32-NEXT: addi a1, sp, 16 +; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v0, v12 +; RV32-NEXT: lui a1, %hi(.LCPI8_5) +; RV32-NEXT: addi a1, a1, %lo(.LCPI8_5) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v20, v4, v24 +; RV32-NEXT: vle16.v v28, (a1) +; RV32-NEXT: vrgatherei16.vv v8, v20, v28 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v20, v8 +; RV32-NEXT: vmv.v.v v8, v24 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 2 +; RV32-NEXT: li a2, 28 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: lui a1, %hi(.LCPI8_6) +; RV32-NEXT: addi a1, a1, %lo(.LCPI8_6) ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vrgatherei16.vv v24, v8, v2 +; RV32-NEXT: vle16.v v24, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 12 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v0, v16, v24 ; RV32-NEXT: lui a1, %hi(.LCPI8_7) ; RV32-NEXT: addi a1, a1, %lo(.LCPI8_7) -; RV32-NEXT: lui a2, %hi(.LCPI8_8) -; RV32-NEXT: addi a2, a2, %lo(.LCPI8_8) -; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; RV32-NEXT: vle16.v v12, (a1) -; RV32-NEXT: lui a1, %hi(.LCPI8_9) -; RV32-NEXT: addi a1, a1, %lo(.LCPI8_9) -; RV32-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; RV32-NEXT: vle16.v v16, (a2) ; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vle16.v v18, (a1) -; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 20 -; RV32-NEXT: mul a1, a1, a2 -; RV32-NEXT: add a1, sp, a1 -; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vrgatherei16.vv v24, v0, v12 +; RV32-NEXT: vle16.v v20, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 -; RV32-NEXT: mul a1, a1, a2 +; RV32-NEXT: slli a1, a1, 3 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v24, v16, v20 ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma ; RV32-NEXT: vmv.v.v v24, v0 +; RV32-NEXT: lui a1, %hi(.LCPI8_8) +; RV32-NEXT: addi a1, a1, %lo(.LCPI8_8) +; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV32-NEXT: vle16.v v16, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 72 +; RV32-NEXT: li a2, 68 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vrgatherei16.vv v8, v0, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 44 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV32-NEXT: lui a1, %hi(.LCPI8_9) +; RV32-NEXT: addi a1, a1, %lo(.LCPI8_9) +; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma +; RV32-NEXT: vle16.v v16, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: slli a1, a1, 6 +; RV32-NEXT: li a2, 60 +; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl4r.v v4, (a1) # Unknown-size Folded Reload -; RV32-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-NEXT: vrgatherei16.vv v8, v4, v18 +; RV32-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vrgatherei16.vv v20, v28, v16 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 48 +; RV32-NEXT: li a2, 44 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 -; RV32-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV32-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vsetivli zero, 10, e32, m4, tu, ma -; RV32-NEXT: vmv.v.v v8, v0 +; RV32-NEXT: vmv.v.v v20, v8 ; RV32-NEXT: addi a1, a0, 320 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: vse32.v v20, (a1) ; RV32-NEXT: addi a1, a0, 256 ; RV32-NEXT: vse32.v v24, (a1) ; RV32-NEXT: addi a1, a0, 192 -; RV32-NEXT: vse32.v v20, (a1) -; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 40 +; RV32-NEXT: li a3, 28 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) +; RV32-NEXT: addi a1, a0, 128 +; RV32-NEXT: csrr a2, vlenb +; RV32-NEXT: li a3, 52 +; RV32-NEXT: mul a2, a2, a3 +; RV32-NEXT: add a2, sp, a2 +; RV32-NEXT: addi a2, a2, 16 +; RV32-NEXT: vl4r.v v12, (a2) # Unknown-size Folded Reload +; RV32-NEXT: vse32.v v12, (a1) ; RV32-NEXT: addi a1, a0, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: li a3, 56 +; RV32-NEXT: li a3, 36 ; RV32-NEXT: mul a2, a2, a3 ; RV32-NEXT: add a2, sp, a2 ; RV32-NEXT: addi a2, a2, 16 ; RV32-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: li a2, 80 +; RV32-NEXT: li a2, 76 ; RV32-NEXT: mul a1, a1, a2 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 16 ; RV32-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV32-NEXT: vse32.v v8, (a0) ; RV32-NEXT: csrr a0, vlenb -; RV32-NEXT: li a1, 88 +; RV32-NEXT: li a1, 84 ; RV32-NEXT: mul a0, a0, a1 ; RV32-NEXT: add sp, sp, a0 ; RV32-NEXT: .cfi_def_cfa sp, 16 @@ -659,463 +699,419 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: mul a2, a2, a3 ; RV64-NEXT: sub sp, sp, a2 ; RV64-NEXT: .cfi_escape 0x0f, 0x0e, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0xd8, 0x00, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 88 * vlenb -; RV64-NEXT: addi a3, a1, 128 -; RV64-NEXT: addi a6, a1, 256 -; RV64-NEXT: li a4, 128 -; RV64-NEXT: lui a2, 1 -; RV64-NEXT: lui a5, %hi(.LCPI8_0) -; RV64-NEXT: addi a5, a5, %lo(.LCPI8_0) +; RV64-NEXT: addi a3, a1, 256 +; RV64-NEXT: li a2, 128 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.i v16, 6 +; RV64-NEXT: lui a4, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v8, (a6) -; RV64-NEXT: lui a6, 16 -; RV64-NEXT: addi a6, a6, 7 +; RV64-NEXT: vle64.v v8, (a3) +; RV64-NEXT: addi a4, a4, 7 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v17, a6 -; RV64-NEXT: addi a6, a2, 65 +; RV64-NEXT: vmv.v.x v17, a4 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vrgather.vi v4, v8, 4 +; RV64-NEXT: vrgather.vi v24, v8, 4 ; RV64-NEXT: vrgather.vi v20, v8, 5 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 84 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 76 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs4r.v v20, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vrgatherei16.vv v20, v8, v16 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: slli a7, a7, 6 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 84 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs4r.v v20, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vrgatherei16.vv v20, v8, v17 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 56 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs4r.v v20, (a7) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 80 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs4r.v v20, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vrgather.vi v16, v8, 2 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 72 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs4r.v v16, (a7) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 72 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs4r.v v16, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vrgather.vi v16, v8, 3 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 68 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs4r.v v16, (a7) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: slli a3, a3, 6 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs4r.v v16, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vsetivli zero, 8, e64, m8, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 8 -; RV64-NEXT: csrr a7, vlenb -; RV64-NEXT: li t0, 40 -; RV64-NEXT: mul a7, a7, t0 -; RV64-NEXT: add a7, sp, a7 -; RV64-NEXT: addi a7, a7, 16 -; RV64-NEXT: vs8r.v v8, (a7) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v0, a4 -; RV64-NEXT: csrr a4, vlenb -; RV64-NEXT: slli a4, a4, 5 -; RV64-NEXT: add a4, sp, a4 -; RV64-NEXT: addi a4, a4, 16 -; RV64-NEXT: vs1r.v v0, (a4) # Unknown-size Folded Spill +; RV64-NEXT: csrr a3, vlenb +; RV64-NEXT: li a4, 48 +; RV64-NEXT: mul a3, a3, a4 +; RV64-NEXT: add a3, sp, a3 +; RV64-NEXT: addi a3, a3, 16 +; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; RV64-NEXT: vmv.s.x v7, a2 +; RV64-NEXT: vmv1r.v v0, v7 +; RV64-NEXT: addi a2, sp, 16 +; RV64-NEXT: vs1r.v v7, (a2) # Unknown-size Folded Spill +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgather.vi v24, v8, 2, v0.t +; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: li a3, 68 +; RV64-NEXT: mul a2, a2, a3 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: addi a2, a2, 16 +; RV64-NEXT: vs4r.v v24, (a2) # Unknown-size Folded Spill +; RV64-NEXT: addi a3, a1, 128 +; RV64-NEXT: lui a2, 1 +; RV64-NEXT: lui a4, %hi(.LCPI8_0) +; RV64-NEXT: addi a4, a4, %lo(.LCPI8_0) +; RV64-NEXT: addi a5, a2, 65 +; RV64-NEXT: vmv.s.x v0, a5 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle16.v v2, (a4) +; RV64-NEXT: vle64.v v8, (a1) +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a4, 56 +; RV64-NEXT: mul a1, a1, a4 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vle64.v v16, (a3) +; RV64-NEXT: vmerge.vvm v24, v16, v8, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 +; RV64-NEXT: li a3, 40 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vle16.v v12, (a5) +; RV64-NEXT: vrgatherei16.vv v8, v24, v2 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v2, a6 -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v4, v8, 2, v0.t +; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 60 +; RV64-NEXT: li a3, 76 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v4, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v2 -; RV64-NEXT: vmv8r.v v8, v24 +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 48 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vmerge.vvm v24, v16, v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v0, v24, v16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu +; RV64-NEXT: vrgather.vi v24, v8, 3, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 24 +; RV64-NEXT: li a3, 76 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 2 ; RV64-NEXT: lui a3, %hi(.LCPI8_1) ; RV64-NEXT: addi a3, a3, %lo(.LCPI8_1) ; RV64-NEXT: addi a1, a1, 130 -; RV64-NEXT: vle16.v v16, (a3) -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: add a3, sp, a3 -; RV64-NEXT: addi a3, a3, 16 -; RV64-NEXT: vs2r.v v16, (a3) # Unknown-size Folded Spill -; RV64-NEXT: vmv.s.x v2, a1 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl1r.v v0, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 84 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 40 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v24, v16, 3, v0.t +; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle16.v v8, (a3) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 84 -; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: vs2r.v v8, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmerge.vvm v24, v16, v8, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vrgatherei16.vv v0, v24, v8 +; RV64-NEXT: vl2r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v0, v24, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a3, 24 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 4 -; RV64-NEXT: lui a3, 8 ; RV64-NEXT: addi a1, a1, 260 -; RV64-NEXT: addi a3, a3, 520 ; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: vmv.s.x v2, a3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 +; RV64-NEXT: li a3, 40 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v8, v16, v24, v0 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmerge.vvm v24, v16, v8, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl1r.v v7, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: li a3, 84 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 40 +; RV64-NEXT: li a3, 48 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv4r.v v8, v16 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgather.vi v12, v16, 4, v0.t +; RV64-NEXT: vrgather.vi v24, v16, 4, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: li a3, 84 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: vmv1r.v v0, v2 +; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: lui a1, 8 +; RV64-NEXT: addi a1, a1, 520 +; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 +; RV64-NEXT: li a3, 40 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vmerge.vvm v16, v16, v24, v0 +; RV64-NEXT: vmerge.vvm v24, v16, v8, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v7 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 56 +; RV64-NEXT: li a3, 80 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: li a3, 48 +; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: add a1, sp, a1 +; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu ; RV64-NEXT: vrgather.vi v24, v8, 5, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 56 +; RV64-NEXT: li a3, 80 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: lui a1, 96 ; RV64-NEXT: li a3, 192 -; RV64-NEXT: vmv.s.x v3, a3 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v24, a1 -; RV64-NEXT: vmv1r.v v0, v3 +; RV64-NEXT: vmv.v.x v2, a1 +; RV64-NEXT: vmv.s.x v3, a3 +; RV64-NEXT: vmv.v.v v0, v3 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 72 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgatherei16.vv v28, v8, v24, v0.t -; RV64-NEXT: vmv4r.v v16, v8 +; RV64-NEXT: vrgatherei16.vv v24, v8, v2, v0.t ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a3, 72 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill -; RV64-NEXT: lui a1, %hi(.LCPI8_2) -; RV64-NEXT: addi a1, a1, %lo(.LCPI8_2) -; RV64-NEXT: li a3, 1040 -; RV64-NEXT: lui a4, 112 -; RV64-NEXT: addi a4, a4, 1 -; RV64-NEXT: vmv.s.x v0, a3 +; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: li a1, 1040 +; RV64-NEXT: lui a3, 112 +; RV64-NEXT: addi a3, a3, 1 +; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v5, a4 -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vle16.v v6, (a1) +; RV64-NEXT: vmv.v.x v12, a3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 76 -; RV64-NEXT: mul a1, a1, a3 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 48 +; RV64-NEXT: li a3, 56 ; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmerge.vvm v24, v8, v24, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vmerge.vvm v24, v16, v24, v0 +; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vmv1r.v v0, v3 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 68 -; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, mu -; RV64-NEXT: vrgatherei16.vv v28, v16, v5, v0.t +; RV64-NEXT: vrgatherei16.vv v24, v8, v12, v0.t ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a3, 68 -; RV64-NEXT: mul a1, a1, a3 +; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v28, (a1) # Unknown-size Folded Spill -; RV64-NEXT: addi a1, a2, -2016 -; RV64-NEXT: vmv.s.x v0, a1 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs4r.v v24, (a1) # Unknown-size Folded Spill +; RV64-NEXT: lui a1, %hi(.LCPI8_2) +; RV64-NEXT: addi a1, a1, %lo(.LCPI8_2) ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v16, v24, v6 +; RV64-NEXT: vle16.v v24, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v0, v8, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 48 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: li a3, 48 +; RV64-NEXT: mul a1, a1, a3 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs8r.v v0, (a1) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, a2, -2016 +; RV64-NEXT: vmv.s.x v0, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 56 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 56 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; RV64-NEXT: lui a1, %hi(.LCPI8_3) -; RV64-NEXT: addi a1, a1, %lo(.LCPI8_3) -; RV64-NEXT: vle16.v v8, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 68 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 24 -; RV64-NEXT: mul a1, a1, a2 +; RV64-NEXT: slli a1, a1, 5 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 6, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v12, v0 +; RV64-NEXT: vmv.v.v v16, v8 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 68 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vs4r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 84 +; RV64-NEXT: li a2, 76 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v20, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 4 +; RV64-NEXT: li a2, 24 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vmv.v.v v12, v24 +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v20, v8 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 84 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs4r.v v12, (a1) # Unknown-size Folded Spill -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 40 +; RV64-NEXT: li a2, 48 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v16, v24 +; RV64-NEXT: vmv.v.v v8, v24 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 3 +; RV64-NEXT: li a2, 84 +; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vs4r.v v8, (a1) # Unknown-size Folded Spill +; RV64-NEXT: lui a1, %hi(.LCPI8_3) +; RV64-NEXT: addi a1, a1, %lo(.LCPI8_3) ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v0, v24, v8 -; RV64-NEXT: lui a1, %hi(.LCPI8_4) -; RV64-NEXT: addi a1, a1, %lo(.LCPI8_4) -; RV64-NEXT: vle16.v v8, (a1) -; RV64-NEXT: lui a1, %hi(.LCPI8_5) -; RV64-NEXT: addi a1, a1, %lo(.LCPI8_5) -; RV64-NEXT: vle16.v v10, (a1) +; RV64-NEXT: vle16.v v16, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 6 +; RV64-NEXT: slli a1, a1, 4 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vs2r.v v10, (a1) # Unknown-size Folded Spill +; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v24, v8, v16 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 56 +; RV64-NEXT: li a2, 80 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v12, v0 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: slli a1, a1, 5 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: lui a1, %hi(.LCPI8_4) +; RV64-NEXT: addi a1, a1, %lo(.LCPI8_4) ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v0, v8 +; RV64-NEXT: vle16.v v16, (a1) +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v24, v0, v16 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: li a2, 72 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v8, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v12, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v8, v24 +; RV64-NEXT: vmv.v.v v12, v24 +; RV64-NEXT: lui a1, %hi(.LCPI8_5) +; RV64-NEXT: addi a1, a1, %lo(.LCPI8_5) +; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; RV64-NEXT: vle16.v v16, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 76 +; RV64-NEXT: li a2, 56 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 ; RV64-NEXT: vl8r.v v0, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vrgatherei16.vv v24, v0, v16 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 6 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl2r.v v20, (a1) # Unknown-size Folded Reload -; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vrgatherei16.vv v24, v0, v20 -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 68 -; RV64-NEXT: mul a1, a1, a2 -; RV64-NEXT: add a1, sp, a1 -; RV64-NEXT: addi a1, a1, 16 -; RV64-NEXT: vl4r.v v28, (a1) # Unknown-size Folded Reload +; RV64-NEXT: vl4r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetivli zero, 5, e64, m4, tu, ma -; RV64-NEXT: vmv.v.v v28, v24 +; RV64-NEXT: vmv.v.v v16, v24 ; RV64-NEXT: addi a1, a0, 256 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: vse64.v v12, (a1) ; RV64-NEXT: addi a1, a0, 320 -; RV64-NEXT: vse64.v v28, (a1) +; RV64-NEXT: vse64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 192 -; RV64-NEXT: vse64.v v12, (a1) +; RV64-NEXT: vse64.v v8, (a1) ; RV64-NEXT: addi a1, a0, 128 -; RV64-NEXT: vse64.v v16, (a1) -; RV64-NEXT: addi a1, a0, 64 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: li a3, 84 ; RV64-NEXT: mul a2, a2, a3 @@ -1123,8 +1119,10 @@ define {<8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>, <8 x i64>} @load_ ; RV64-NEXT: addi a2, a2, 16 ; RV64-NEXT: vl4r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vse64.v v8, (a1) +; RV64-NEXT: addi a1, a0, 64 +; RV64-NEXT: vse64.v v20, (a1) ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: li a2, 60 +; RV64-NEXT: li a2, 68 ; RV64-NEXT: mul a1, a1, a2 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index f27614c93985f..118408d40c669 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -612,50 +612,51 @@ define <16 x i64> @llrint_v16i64_v16f32(<16 x float> %x) { ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -128 ; RV64-NEXT: addi a0, sp, 64 +; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa5, v8 +; RV64-NEXT: vslidedown.vi v12, v8, 3 +; RV64-NEXT: vslidedown.vi v13, v8, 2 +; RV64-NEXT: vslidedown.vi v14, v8, 1 +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v16, v8, 7 +; RV64-NEXT: vslidedown.vi v18, v8, 6 +; RV64-NEXT: vslidedown.vi v20, v8, 5 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vse32.v v8, (a0) -; RV64-NEXT: flw fa5, 124(sp) -; RV64-NEXT: vfmv.f.s fa4, v8 -; RV64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 3 -; RV64-NEXT: vslidedown.vi v11, v8, 2 +; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-NEXT: vslidedown.vi v8, v8, 4 ; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: sd a0, 248(sp) -; RV64-NEXT: flw fa5, 120(sp) -; RV64-NEXT: vslidedown.vi v12, v8, 1 -; RV64-NEXT: fcvt.l.s a0, fa4 -; RV64-NEXT: vfmv.f.s fa4, v10 +; RV64-NEXT: vfmv.f.s fa5, v12 ; RV64-NEXT: fcvt.l.s a1, fa5 -; RV64-NEXT: sd a1, 240(sp) -; RV64-NEXT: flw fa5, 116(sp) -; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV64-NEXT: vslidedown.vi v14, v8, 7 -; RV64-NEXT: fcvt.l.s a1, fa4 -; RV64-NEXT: vfmv.f.s fa4, v11 +; RV64-NEXT: vfmv.f.s fa5, v13 ; RV64-NEXT: fcvt.l.s a2, fa5 -; RV64-NEXT: sd a2, 232(sp) -; RV64-NEXT: flw fa5, 112(sp) -; RV64-NEXT: fcvt.l.s a2, fa4 -; RV64-NEXT: vfmv.f.s fa4, v12 -; RV64-NEXT: vslidedown.vi v10, v8, 6 +; RV64-NEXT: vfmv.f.s fa5, v14 ; RV64-NEXT: fcvt.l.s a3, fa5 -; RV64-NEXT: sd a3, 224(sp) -; RV64-NEXT: flw fa5, 108(sp) -; RV64-NEXT: fcvt.l.s a3, fa4 -; RV64-NEXT: vfmv.f.s fa4, v14 -; RV64-NEXT: vslidedown.vi v12, v8, 5 +; RV64-NEXT: vfmv.f.s fa5, v16 ; RV64-NEXT: fcvt.l.s a4, fa5 -; RV64-NEXT: sd a4, 216(sp) -; RV64-NEXT: flw fa5, 104(sp) -; RV64-NEXT: fcvt.l.s a4, fa4 -; RV64-NEXT: vfmv.f.s fa4, v10 -; RV64-NEXT: fcvt.l.s a5, fa4 +; RV64-NEXT: vfmv.f.s fa5, v18 +; RV64-NEXT: fcvt.l.s a5, fa5 +; RV64-NEXT: vfmv.f.s fa5, v20 ; RV64-NEXT: fcvt.l.s a6, fa5 -; RV64-NEXT: sd a6, 208(sp) +; RV64-NEXT: flw fa5, 124(sp) +; RV64-NEXT: fcvt.l.s a7, fa5 +; RV64-NEXT: sd a7, 248(sp) +; RV64-NEXT: flw fa5, 120(sp) +; RV64-NEXT: fcvt.l.s a7, fa5 +; RV64-NEXT: sd a7, 240(sp) +; RV64-NEXT: flw fa5, 116(sp) +; RV64-NEXT: fcvt.l.s a7, fa5 +; RV64-NEXT: sd a7, 232(sp) +; RV64-NEXT: flw fa5, 112(sp) +; RV64-NEXT: fcvt.l.s a7, fa5 +; RV64-NEXT: sd a7, 224(sp) +; RV64-NEXT: flw fa5, 108(sp) +; RV64-NEXT: fcvt.l.s a7, fa5 +; RV64-NEXT: sd a7, 216(sp) +; RV64-NEXT: flw fa5, 104(sp) +; RV64-NEXT: fcvt.l.s a7, fa5 +; RV64-NEXT: sd a7, 208(sp) ; RV64-NEXT: flw fa5, 100(sp) -; RV64-NEXT: vfmv.f.s fa4, v12 -; RV64-NEXT: fcvt.l.s a6, fa4 -; RV64-NEXT: vslidedown.vi v8, v8, 4 ; RV64-NEXT: fcvt.l.s a7, fa5 ; RV64-NEXT: vfmv.f.s fa5, v8 ; RV64-NEXT: sd a7, 200(sp) @@ -981,26 +982,27 @@ define <8 x i64> @llrint_v8i64_v8f64(<8 x double> %x) { ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: mv a0, sp +; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vfmv.f.s fa5, v8 +; RV64-NEXT: vslidedown.vi v12, v8, 1 +; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-NEXT: vslidedown.vi v14, v8, 3 ; RV64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-NEXT: vse64.v v8, (a0) -; RV64-NEXT: fld fa5, 56(sp) -; RV64-NEXT: vfmv.f.s fa4, v8 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 1 -; RV64-NEXT: fcvt.l.d a0, fa4 -; RV64-NEXT: fcvt.l.d a1, fa5 -; RV64-NEXT: sd a1, 120(sp) -; RV64-NEXT: fld fa5, 48(sp) -; RV64-NEXT: vfmv.f.s fa4, v10 ; RV64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-NEXT: vslidedown.vi v10, v8, 3 -; RV64-NEXT: fcvt.l.d a1, fa4 +; RV64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-NEXT: fcvt.l.d a0, fa5 +; RV64-NEXT: vfmv.f.s fa5, v12 +; RV64-NEXT: fcvt.l.d a1, fa5 +; RV64-NEXT: vfmv.f.s fa5, v14 ; RV64-NEXT: fcvt.l.d a2, fa5 -; RV64-NEXT: sd a2, 112(sp) +; RV64-NEXT: fld fa5, 56(sp) +; RV64-NEXT: fcvt.l.d a3, fa5 +; RV64-NEXT: sd a3, 120(sp) +; RV64-NEXT: fld fa5, 48(sp) +; RV64-NEXT: fcvt.l.d a3, fa5 +; RV64-NEXT: sd a3, 112(sp) ; RV64-NEXT: fld fa5, 40(sp) -; RV64-NEXT: vfmv.f.s fa4, v10 -; RV64-NEXT: fcvt.l.d a2, fa4 -; RV64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-NEXT: fcvt.l.d a3, fa5 ; RV64-NEXT: vfmv.f.s fa5, v8 ; RV64-NEXT: sd a3, 104(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index 2f58e3dd2769f..23ecc74880c6a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -363,50 +363,51 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { ; RV32-NEXT: .cfi_def_cfa s0, 0 ; RV32-NEXT: andi sp, sp, -64 ; RV32-NEXT: mv a0, sp +; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV32-NEXT: vfmv.f.s fa5, v8 +; RV32-NEXT: vslidedown.vi v12, v8, 3 +; RV32-NEXT: vslidedown.vi v13, v8, 2 +; RV32-NEXT: vslidedown.vi v14, v8, 1 +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v16, v8, 7 +; RV32-NEXT: vslidedown.vi v18, v8, 6 +; RV32-NEXT: vslidedown.vi v20, v8, 5 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vse32.v v8, (a0) -; RV32-NEXT: flw fa5, 60(sp) -; RV32-NEXT: vfmv.f.s fa4, v8 -; RV32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 3 -; RV32-NEXT: vslidedown.vi v11, v8, 2 +; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 4 ; RV32-NEXT: fcvt.w.s a0, fa5 -; RV32-NEXT: sw a0, 124(sp) -; RV32-NEXT: flw fa5, 56(sp) -; RV32-NEXT: fcvt.w.s a0, fa4 -; RV32-NEXT: vfmv.f.s fa4, v10 -; RV32-NEXT: vslidedown.vi v10, v8, 1 +; RV32-NEXT: vfmv.f.s fa5, v12 ; RV32-NEXT: fcvt.w.s a1, fa5 -; RV32-NEXT: sw a1, 120(sp) -; RV32-NEXT: flw fa5, 52(sp) -; RV32-NEXT: fcvt.w.s a1, fa4 -; RV32-NEXT: vfmv.f.s fa4, v11 -; RV32-NEXT: fcvt.w.s a2, fa4 +; RV32-NEXT: vfmv.f.s fa5, v13 +; RV32-NEXT: fcvt.w.s a2, fa5 +; RV32-NEXT: vfmv.f.s fa5, v14 ; RV32-NEXT: fcvt.w.s a3, fa5 -; RV32-NEXT: sw a3, 116(sp) -; RV32-NEXT: flw fa5, 48(sp) -; RV32-NEXT: vfmv.f.s fa4, v10 -; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV32-NEXT: vslidedown.vi v10, v8, 7 -; RV32-NEXT: fcvt.w.s a3, fa4 +; RV32-NEXT: vfmv.f.s fa5, v16 ; RV32-NEXT: fcvt.w.s a4, fa5 -; RV32-NEXT: sw a4, 112(sp) -; RV32-NEXT: flw fa5, 44(sp) -; RV32-NEXT: vfmv.f.s fa4, v10 -; RV32-NEXT: vslidedown.vi v10, v8, 6 -; RV32-NEXT: fcvt.w.s a4, fa4 +; RV32-NEXT: vfmv.f.s fa5, v18 ; RV32-NEXT: fcvt.w.s a5, fa5 -; RV32-NEXT: sw a5, 108(sp) -; RV32-NEXT: flw fa5, 40(sp) -; RV32-NEXT: vfmv.f.s fa4, v10 -; RV32-NEXT: vslidedown.vi v10, v8, 5 -; RV32-NEXT: fcvt.w.s a5, fa4 +; RV32-NEXT: vfmv.f.s fa5, v20 ; RV32-NEXT: fcvt.w.s a6, fa5 -; RV32-NEXT: sw a6, 104(sp) +; RV32-NEXT: flw fa5, 60(sp) +; RV32-NEXT: fcvt.w.s a7, fa5 +; RV32-NEXT: sw a7, 124(sp) +; RV32-NEXT: flw fa5, 56(sp) +; RV32-NEXT: fcvt.w.s a7, fa5 +; RV32-NEXT: sw a7, 120(sp) +; RV32-NEXT: flw fa5, 52(sp) +; RV32-NEXT: fcvt.w.s a7, fa5 +; RV32-NEXT: sw a7, 116(sp) +; RV32-NEXT: flw fa5, 48(sp) +; RV32-NEXT: fcvt.w.s a7, fa5 +; RV32-NEXT: sw a7, 112(sp) +; RV32-NEXT: flw fa5, 44(sp) +; RV32-NEXT: fcvt.w.s a7, fa5 +; RV32-NEXT: sw a7, 108(sp) +; RV32-NEXT: flw fa5, 40(sp) +; RV32-NEXT: fcvt.w.s a7, fa5 +; RV32-NEXT: sw a7, 104(sp) ; RV32-NEXT: flw fa5, 36(sp) -; RV32-NEXT: vfmv.f.s fa4, v10 -; RV32-NEXT: fcvt.w.s a6, fa4 -; RV32-NEXT: vslidedown.vi v8, v8, 4 ; RV32-NEXT: fcvt.w.s a7, fa5 ; RV32-NEXT: vfmv.f.s fa5, v8 ; RV32-NEXT: sw a7, 100(sp) @@ -447,50 +448,51 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { ; RV64-i32-NEXT: .cfi_def_cfa s0, 0 ; RV64-i32-NEXT: andi sp, sp, -64 ; RV64-i32-NEXT: mv a0, sp +; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-i32-NEXT: vfmv.f.s fa5, v8 +; RV64-i32-NEXT: vslidedown.vi v12, v8, 3 +; RV64-i32-NEXT: vslidedown.vi v13, v8, 2 +; RV64-i32-NEXT: vslidedown.vi v14, v8, 1 +; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-i32-NEXT: vslidedown.vi v16, v8, 7 +; RV64-i32-NEXT: vslidedown.vi v18, v8, 6 +; RV64-i32-NEXT: vslidedown.vi v20, v8, 5 ; RV64-i32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-i32-NEXT: vse32.v v8, (a0) -; RV64-i32-NEXT: flw fa5, 60(sp) -; RV64-i32-NEXT: vfmv.f.s fa4, v8 -; RV64-i32-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-i32-NEXT: vslidedown.vi v10, v8, 3 -; RV64-i32-NEXT: vslidedown.vi v11, v8, 2 +; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-i32-NEXT: vslidedown.vi v8, v8, 4 ; RV64-i32-NEXT: fcvt.l.s a0, fa5 -; RV64-i32-NEXT: sw a0, 124(sp) -; RV64-i32-NEXT: flw fa5, 56(sp) -; RV64-i32-NEXT: fcvt.l.s a0, fa4 -; RV64-i32-NEXT: vfmv.f.s fa4, v10 -; RV64-i32-NEXT: vslidedown.vi v10, v8, 1 +; RV64-i32-NEXT: vfmv.f.s fa5, v12 ; RV64-i32-NEXT: fcvt.l.s a1, fa5 -; RV64-i32-NEXT: sw a1, 120(sp) -; RV64-i32-NEXT: flw fa5, 52(sp) -; RV64-i32-NEXT: fcvt.l.s a1, fa4 -; RV64-i32-NEXT: vfmv.f.s fa4, v11 -; RV64-i32-NEXT: fcvt.l.s a2, fa4 +; RV64-i32-NEXT: vfmv.f.s fa5, v13 +; RV64-i32-NEXT: fcvt.l.s a2, fa5 +; RV64-i32-NEXT: vfmv.f.s fa5, v14 ; RV64-i32-NEXT: fcvt.l.s a3, fa5 -; RV64-i32-NEXT: sw a3, 116(sp) -; RV64-i32-NEXT: flw fa5, 48(sp) -; RV64-i32-NEXT: vfmv.f.s fa4, v10 -; RV64-i32-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV64-i32-NEXT: vslidedown.vi v10, v8, 7 -; RV64-i32-NEXT: fcvt.l.s a3, fa4 +; RV64-i32-NEXT: vfmv.f.s fa5, v16 ; RV64-i32-NEXT: fcvt.l.s a4, fa5 -; RV64-i32-NEXT: sw a4, 112(sp) -; RV64-i32-NEXT: flw fa5, 44(sp) -; RV64-i32-NEXT: vfmv.f.s fa4, v10 -; RV64-i32-NEXT: vslidedown.vi v10, v8, 6 -; RV64-i32-NEXT: fcvt.l.s a4, fa4 +; RV64-i32-NEXT: vfmv.f.s fa5, v18 ; RV64-i32-NEXT: fcvt.l.s a5, fa5 -; RV64-i32-NEXT: sw a5, 108(sp) -; RV64-i32-NEXT: flw fa5, 40(sp) -; RV64-i32-NEXT: vfmv.f.s fa4, v10 -; RV64-i32-NEXT: vslidedown.vi v10, v8, 5 -; RV64-i32-NEXT: fcvt.l.s a5, fa4 +; RV64-i32-NEXT: vfmv.f.s fa5, v20 ; RV64-i32-NEXT: fcvt.l.s a6, fa5 -; RV64-i32-NEXT: sw a6, 104(sp) +; RV64-i32-NEXT: flw fa5, 60(sp) +; RV64-i32-NEXT: fcvt.l.s a7, fa5 +; RV64-i32-NEXT: sw a7, 124(sp) +; RV64-i32-NEXT: flw fa5, 56(sp) +; RV64-i32-NEXT: fcvt.l.s a7, fa5 +; RV64-i32-NEXT: sw a7, 120(sp) +; RV64-i32-NEXT: flw fa5, 52(sp) +; RV64-i32-NEXT: fcvt.l.s a7, fa5 +; RV64-i32-NEXT: sw a7, 116(sp) +; RV64-i32-NEXT: flw fa5, 48(sp) +; RV64-i32-NEXT: fcvt.l.s a7, fa5 +; RV64-i32-NEXT: sw a7, 112(sp) +; RV64-i32-NEXT: flw fa5, 44(sp) +; RV64-i32-NEXT: fcvt.l.s a7, fa5 +; RV64-i32-NEXT: sw a7, 108(sp) +; RV64-i32-NEXT: flw fa5, 40(sp) +; RV64-i32-NEXT: fcvt.l.s a7, fa5 +; RV64-i32-NEXT: sw a7, 104(sp) ; RV64-i32-NEXT: flw fa5, 36(sp) -; RV64-i32-NEXT: vfmv.f.s fa4, v10 -; RV64-i32-NEXT: fcvt.l.s a6, fa4 -; RV64-i32-NEXT: vslidedown.vi v8, v8, 4 ; RV64-i32-NEXT: fcvt.l.s a7, fa5 ; RV64-i32-NEXT: vfmv.f.s fa5, v8 ; RV64-i32-NEXT: sw a7, 100(sp) @@ -531,50 +533,51 @@ define <16 x iXLen> @lrint_v16f32(<16 x float> %x) { ; RV64-i64-NEXT: .cfi_def_cfa s0, 0 ; RV64-i64-NEXT: andi sp, sp, -128 ; RV64-i64-NEXT: addi a0, sp, 64 +; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma +; RV64-i64-NEXT: vfmv.f.s fa5, v8 +; RV64-i64-NEXT: vslidedown.vi v12, v8, 3 +; RV64-i64-NEXT: vslidedown.vi v13, v8, 2 +; RV64-i64-NEXT: vslidedown.vi v14, v8, 1 +; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-i64-NEXT: vslidedown.vi v16, v8, 7 +; RV64-i64-NEXT: vslidedown.vi v18, v8, 6 +; RV64-i64-NEXT: vslidedown.vi v20, v8, 5 ; RV64-i64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-i64-NEXT: vse32.v v8, (a0) -; RV64-i64-NEXT: flw fa5, 124(sp) -; RV64-i64-NEXT: vfmv.f.s fa4, v8 -; RV64-i64-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; RV64-i64-NEXT: vslidedown.vi v10, v8, 3 -; RV64-i64-NEXT: vslidedown.vi v11, v8, 2 +; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64-i64-NEXT: vslidedown.vi v8, v8, 4 ; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: sd a0, 248(sp) -; RV64-i64-NEXT: flw fa5, 120(sp) -; RV64-i64-NEXT: vslidedown.vi v12, v8, 1 -; RV64-i64-NEXT: fcvt.l.s a0, fa4 -; RV64-i64-NEXT: vfmv.f.s fa4, v10 +; RV64-i64-NEXT: vfmv.f.s fa5, v12 ; RV64-i64-NEXT: fcvt.l.s a1, fa5 -; RV64-i64-NEXT: sd a1, 240(sp) -; RV64-i64-NEXT: flw fa5, 116(sp) -; RV64-i64-NEXT: vsetivli zero, 1, e32, m2, ta, ma -; RV64-i64-NEXT: vslidedown.vi v14, v8, 7 -; RV64-i64-NEXT: fcvt.l.s a1, fa4 -; RV64-i64-NEXT: vfmv.f.s fa4, v11 +; RV64-i64-NEXT: vfmv.f.s fa5, v13 ; RV64-i64-NEXT: fcvt.l.s a2, fa5 -; RV64-i64-NEXT: sd a2, 232(sp) -; RV64-i64-NEXT: flw fa5, 112(sp) -; RV64-i64-NEXT: fcvt.l.s a2, fa4 -; RV64-i64-NEXT: vfmv.f.s fa4, v12 -; RV64-i64-NEXT: vslidedown.vi v10, v8, 6 +; RV64-i64-NEXT: vfmv.f.s fa5, v14 ; RV64-i64-NEXT: fcvt.l.s a3, fa5 -; RV64-i64-NEXT: sd a3, 224(sp) -; RV64-i64-NEXT: flw fa5, 108(sp) -; RV64-i64-NEXT: fcvt.l.s a3, fa4 -; RV64-i64-NEXT: vfmv.f.s fa4, v14 -; RV64-i64-NEXT: vslidedown.vi v12, v8, 5 +; RV64-i64-NEXT: vfmv.f.s fa5, v16 ; RV64-i64-NEXT: fcvt.l.s a4, fa5 -; RV64-i64-NEXT: sd a4, 216(sp) -; RV64-i64-NEXT: flw fa5, 104(sp) -; RV64-i64-NEXT: fcvt.l.s a4, fa4 -; RV64-i64-NEXT: vfmv.f.s fa4, v10 -; RV64-i64-NEXT: fcvt.l.s a5, fa4 +; RV64-i64-NEXT: vfmv.f.s fa5, v18 +; RV64-i64-NEXT: fcvt.l.s a5, fa5 +; RV64-i64-NEXT: vfmv.f.s fa5, v20 ; RV64-i64-NEXT: fcvt.l.s a6, fa5 -; RV64-i64-NEXT: sd a6, 208(sp) +; RV64-i64-NEXT: flw fa5, 124(sp) +; RV64-i64-NEXT: fcvt.l.s a7, fa5 +; RV64-i64-NEXT: sd a7, 248(sp) +; RV64-i64-NEXT: flw fa5, 120(sp) +; RV64-i64-NEXT: fcvt.l.s a7, fa5 +; RV64-i64-NEXT: sd a7, 240(sp) +; RV64-i64-NEXT: flw fa5, 116(sp) +; RV64-i64-NEXT: fcvt.l.s a7, fa5 +; RV64-i64-NEXT: sd a7, 232(sp) +; RV64-i64-NEXT: flw fa5, 112(sp) +; RV64-i64-NEXT: fcvt.l.s a7, fa5 +; RV64-i64-NEXT: sd a7, 224(sp) +; RV64-i64-NEXT: flw fa5, 108(sp) +; RV64-i64-NEXT: fcvt.l.s a7, fa5 +; RV64-i64-NEXT: sd a7, 216(sp) +; RV64-i64-NEXT: flw fa5, 104(sp) +; RV64-i64-NEXT: fcvt.l.s a7, fa5 +; RV64-i64-NEXT: sd a7, 208(sp) ; RV64-i64-NEXT: flw fa5, 100(sp) -; RV64-i64-NEXT: vfmv.f.s fa4, v12 -; RV64-i64-NEXT: fcvt.l.s a6, fa4 -; RV64-i64-NEXT: vslidedown.vi v8, v8, 4 ; RV64-i64-NEXT: fcvt.l.s a7, fa5 ; RV64-i64-NEXT: vfmv.f.s fa5, v8 ; RV64-i64-NEXT: sd a7, 200(sp) @@ -877,26 +880,27 @@ define <8 x iXLen> @lrint_v8f64(<8 x double> %x) { ; RV64-i64-NEXT: .cfi_def_cfa s0, 0 ; RV64-i64-NEXT: andi sp, sp, -64 ; RV64-i64-NEXT: mv a0, sp +; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-i64-NEXT: vfmv.f.s fa5, v8 +; RV64-i64-NEXT: vslidedown.vi v12, v8, 1 +; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma +; RV64-i64-NEXT: vslidedown.vi v14, v8, 3 ; RV64-i64-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-i64-NEXT: vse64.v v8, (a0) -; RV64-i64-NEXT: fld fa5, 56(sp) -; RV64-i64-NEXT: vfmv.f.s fa4, v8 -; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; RV64-i64-NEXT: vslidedown.vi v10, v8, 1 -; RV64-i64-NEXT: fcvt.l.d a0, fa4 -; RV64-i64-NEXT: fcvt.l.d a1, fa5 -; RV64-i64-NEXT: sd a1, 120(sp) -; RV64-i64-NEXT: fld fa5, 48(sp) -; RV64-i64-NEXT: vfmv.f.s fa4, v10 ; RV64-i64-NEXT: vsetivli zero, 1, e64, m2, ta, ma -; RV64-i64-NEXT: vslidedown.vi v10, v8, 3 -; RV64-i64-NEXT: fcvt.l.d a1, fa4 +; RV64-i64-NEXT: vslidedown.vi v8, v8, 2 +; RV64-i64-NEXT: fcvt.l.d a0, fa5 +; RV64-i64-NEXT: vfmv.f.s fa5, v12 +; RV64-i64-NEXT: fcvt.l.d a1, fa5 +; RV64-i64-NEXT: vfmv.f.s fa5, v14 ; RV64-i64-NEXT: fcvt.l.d a2, fa5 -; RV64-i64-NEXT: sd a2, 112(sp) +; RV64-i64-NEXT: fld fa5, 56(sp) +; RV64-i64-NEXT: fcvt.l.d a3, fa5 +; RV64-i64-NEXT: sd a3, 120(sp) +; RV64-i64-NEXT: fld fa5, 48(sp) +; RV64-i64-NEXT: fcvt.l.d a3, fa5 +; RV64-i64-NEXT: sd a3, 112(sp) ; RV64-i64-NEXT: fld fa5, 40(sp) -; RV64-i64-NEXT: vfmv.f.s fa4, v10 -; RV64-i64-NEXT: fcvt.l.d a2, fa4 -; RV64-i64-NEXT: vslidedown.vi v8, v8, 2 ; RV64-i64-NEXT: fcvt.l.d a3, fa5 ; RV64-i64-NEXT: vfmv.f.s fa5, v8 ; RV64-i64-NEXT: sd a3, 104(sp) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll index c29ccd45528b8..a258818539258 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-buildvec.ll @@ -141,8 +141,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v4i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: vmv.v.i v0, 3 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -151,8 +151,8 @@ define <4 x i1> @buildvec_mask_nonconst_v4i1(i1 %x, i1 %y) { ; ZVE32F-LABEL: buildvec_mask_nonconst_v4i1: ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; ZVE32F-NEXT: vmv.v.i v0, 3 ; ZVE32F-NEXT: vmv.v.x v8, a1 +; ZVE32F-NEXT: vmv.v.i v0, 3 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -245,10 +245,10 @@ define <8 x i1> @buildvec_mask_v8i1() { define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; CHECK-LABEL: buildvec_mask_nonconst_v8i1: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 19 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmv.s.x v0, a2 ; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: li a1, 19 +; CHECK-NEXT: vmv.s.x v0, a1 ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -256,10 +256,10 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1(i1 %x, i1 %y) { ; ; ZVE32F-LABEL: buildvec_mask_nonconst_v8i1: ; ZVE32F: # %bb.0: -; ZVE32F-NEXT: li a2, 19 ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; ZVE32F-NEXT: vmv.s.x v0, a2 ; ZVE32F-NEXT: vmv.v.x v8, a1 +; ZVE32F-NEXT: li a1, 19 +; ZVE32F-NEXT: vmv.s.x v0, a1 ; ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -282,12 +282,12 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vslide1down.vx v9, v8, a0 ; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v8, v8, zero ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -299,12 +299,12 @@ define <8 x i1> @buildvec_mask_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 %w) { ; ZVE32F-NEXT: vmv.v.x v8, a0 ; ZVE32F-NEXT: vslide1down.vx v9, v8, a0 ; ZVE32F-NEXT: li a0, 1 -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v9, v9, a0 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -327,12 +327,12 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vslide1down.vx v9, v8, a0 ; CHECK-NEXT: li a0, 1 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v8, v8, a3 ; CHECK-NEXT: vslide1down.vx v9, v9, a0 ; CHECK-NEXT: vslide1down.vx v8, v8, zero ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a2 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -344,12 +344,12 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1_2(i1 %x, i1 %y, i1 %z, i1 % ; ZVE32F-NEXT: vmv.v.x v8, a0 ; ZVE32F-NEXT: vslide1down.vx v9, v8, a0 ; ZVE32F-NEXT: li a0, 1 -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a3 ; ZVE32F-NEXT: vslide1down.vx v9, v9, a0 ; ZVE32F-NEXT: vslide1down.vx v8, v8, zero ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -370,13 +370,13 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslide1down.vx v9, v8, a0 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 ; CHECK-NEXT: vslide1down.vx v9, v9, a1 ; CHECK-NEXT: vslide1down.vx v8, v8, a1 +; CHECK-NEXT: vmv.v.i v0, 15 ; CHECK-NEXT: vslidedown.vi v8, v9, 4, v0.t ; CHECK-NEXT: vand.vi v8, v8, 1 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -386,13 +386,13 @@ define <8 x i1> @buildvec_mask_optsize_nonconst_v8i1(i1 %x, i1 %y) optsize { ; ZVE32F: # %bb.0: ; ZVE32F-NEXT: vsetivli zero, 8, e8, mf2, ta, mu ; ZVE32F-NEXT: vmv.v.x v8, a0 -; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslide1down.vx v9, v8, a0 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 ; ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; ZVE32F-NEXT: vslide1down.vx v8, v8, a1 +; ZVE32F-NEXT: vmv.v.i v0, 15 ; ZVE32F-NEXT: vslidedown.vi v8, v9, 4, v0.t ; ZVE32F-NEXT: vand.vi v8, v8, 1 ; ZVE32F-NEXT: vmsne.vi v0, v8, 0 @@ -528,12 +528,12 @@ define <128 x i1> @buildvec_mask_v128i1() { ; RV64: # %bb.0: ; RV64-NEXT: lui a0, %hi(.LCPI20_0) ; RV64-NEXT: ld a0, %lo(.LCPI20_0)(a0) -; RV64-NEXT: lui a1, %hi(.LCPI20_1) -; RV64-NEXT: ld a1, %lo(.LCPI20_1)(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vmv.v.x v0, a0 +; RV64-NEXT: lui a0, %hi(.LCPI20_1) +; RV64-NEXT: ld a0, %lo(.LCPI20_1)(a0) ; RV64-NEXT: vsetvli zero, zero, e64, m1, tu, ma -; RV64-NEXT: vmv.s.x v0, a1 +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: ret ; ; ZVE32F-LABEL: buildvec_mask_v128i1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll index 979785dd2c024..84486a96873d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-mask-splat.ll @@ -24,11 +24,11 @@ define void @splat_zeros_v2i1(ptr %x) { define void @splat_v1i1(ptr %x, i1 %y) { ; CHECK-LABEL: splat_v1i1: ; CHECK: # %bb.0: -; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: andi a1, a1, 1 +; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 @@ -48,11 +48,11 @@ define void @splat_v1i1_icmp(ptr %x, i32 signext %y, i32 signext %z) { ; CHECK-LABEL: splat_v1i1_icmp: ; CHECK: # %bb.0: ; CHECK-NEXT: xor a1, a1, a2 -; CHECK-NEXT: seqz a1, a1 ; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.s.x v8, a1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: seqz a1, a1 +; CHECK-NEXT: vmv.s.x v9, a1 +; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 @@ -84,9 +84,9 @@ define void @splat_v4i1(ptr %x, i1 %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: andi a1, a1, 1 ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vmv.v.x v8, a1 -; CHECK-NEXT: vmsne.vi v0, v8, 0 ; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.x v9, a1 +; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v9, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 232a364e87f0e..29e7179b65acb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -406,7 +406,6 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV32ZVE32F: # %bb.0: ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t -; RV32ZVE32F-NEXT: sw zero, 12(a0) ; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV32ZVE32F-NEXT: vmv.x.s a1, v9 ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 @@ -415,6 +414,7 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV32ZVE32F-NEXT: sw a1, 0(a0) ; RV32ZVE32F-NEXT: sw zero, 4(a0) ; RV32ZVE32F-NEXT: sw a2, 8(a0) +; RV32ZVE32F-NEXT: sw zero, 12(a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_v2i8_zextload_v2i64: @@ -732,9 +732,9 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: .LBB12_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB12_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -755,9 +755,9 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB12_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB12_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -1433,9 +1433,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: .LBB23_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -1458,9 +1458,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB23_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB23_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -1582,9 +1582,9 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB24_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -1607,9 +1607,9 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB24_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB24_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -1732,9 +1732,9 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB25_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -1758,9 +1758,9 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB25_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB25_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -1885,9 +1885,9 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: .LBB26_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB26_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -1909,9 +1909,9 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB26_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -2149,15 +2149,15 @@ define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; ; RV32ZVE32F-LABEL: mgather_v2i32_zextload_v2i64: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: addi a1, a0, 8 ; RV32ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; RV32ZVE32F-NEXT: vluxei32.v v9, (zero), v8, v0.t ; RV32ZVE32F-NEXT: sw zero, 4(a0) ; RV32ZVE32F-NEXT: sw zero, 12(a0) +; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vse32.v v9, (a0) -; RV32ZVE32F-NEXT: vslidedown.vi v8, v9, 1 -; RV32ZVE32F-NEXT: vse32.v v8, (a1) +; RV32ZVE32F-NEXT: addi a0, a0, 8 +; RV32ZVE32F-NEXT: vse32.v v8, (a0) ; RV32ZVE32F-NEXT: ret ; ; RV64ZVE32F-LABEL: mgather_v2i32_zextload_v2i64: @@ -2480,9 +2480,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2505,9 +2505,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB35_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB35_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -2628,9 +2628,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB36_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2653,9 +2653,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB36_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB36_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -2780,9 +2780,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB37_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2806,9 +2806,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB37_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB37_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -2937,9 +2937,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB38_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2962,9 +2962,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB38_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB38_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -3087,9 +3087,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB39_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -3112,9 +3112,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB39_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB39_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -3240,9 +3240,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB40_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB40_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -3266,9 +3266,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB40_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB40_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -3391,9 +3391,9 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: .LBB41_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB41_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -3415,9 +3415,9 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB41_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -4109,9 +4109,9 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: beqz a3, .LBB48_7 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: beqz a1, .LBB48_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 0(a3) @@ -4272,9 +4272,9 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: .LBB48_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: beqz a6, .LBB48_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8 @@ -4320,8 +4320,8 @@ define <8 x i64> @mgather_baseidx_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB48_14: # %else14 -; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: beqz t2, .LBB48_17 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 @@ -4386,9 +4386,9 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: beqz a3, .LBB49_7 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: beqz a1, .LBB49_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 0(a3) @@ -4549,9 +4549,9 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB49_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: beqz a6, .LBB49_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8 @@ -4597,8 +4597,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB49_14: # %else14 -; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: beqz t2, .LBB49_17 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 @@ -4665,9 +4665,9 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: beqz a3, .LBB50_7 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: beqz a1, .LBB50_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 0(a3) @@ -4830,9 +4830,9 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB50_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: beqz a6, .LBB50_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8 @@ -4882,8 +4882,8 @@ define <8 x i64> @mgather_baseidx_zext_v8i8_v8i64(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB50_14: # %else14 -; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: beqz t2, .LBB50_17 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 @@ -4950,10 +4950,10 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8 -; RV32ZVE32F-NEXT: beqz a3, .LBB51_7 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: beqz a1, .LBB51_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v10 @@ -5116,9 +5116,9 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB51_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: beqz a6, .LBB51_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8 @@ -5164,8 +5164,8 @@ define <8 x i64> @mgather_baseidx_v8i16_v8i64(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB51_14: # %else14 -; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: beqz t2, .LBB51_17 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 @@ -5229,10 +5229,10 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8 -; RV32ZVE32F-NEXT: beqz a3, .LBB52_7 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: beqz a1, .LBB52_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v10 @@ -5395,9 +5395,9 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB52_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: beqz a6, .LBB52_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8 @@ -5443,8 +5443,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB52_14: # %else14 -; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: beqz t2, .LBB52_17 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 @@ -5510,10 +5510,10 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32ZVE32F-NEXT: vwmaccu.vx v10, a1, v8 -; RV32ZVE32F-NEXT: beqz a3, .LBB53_7 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: beqz a1, .LBB53_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a3, v10 @@ -5678,9 +5678,9 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: .LBB53_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: beqz a6, .LBB53_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8 @@ -5730,8 +5730,8 @@ define <8 x i64> @mgather_baseidx_zext_v8i16_v8i64(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB53_14: # %else14 -; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: beqz t2, .LBB53_17 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 @@ -5797,10 +5797,10 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: beqz a3, .LBB54_7 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: beqz a1, .LBB54_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 0(a3) @@ -5962,9 +5962,9 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB54_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: beqz a6, .LBB54_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8 @@ -6010,8 +6010,8 @@ define <8 x i64> @mgather_baseidx_v8i32_v8i64(ptr %base, <8 x i32> %idxs, <8 x i ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB54_14: # %else14 -; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: beqz t2, .LBB54_17 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 @@ -6074,10 +6074,10 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: beqz a3, .LBB55_7 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: beqz a1, .LBB55_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 0(a3) @@ -6239,9 +6239,9 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: .LBB55_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: beqz a6, .LBB55_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8 @@ -6287,8 +6287,8 @@ define <8 x i64> @mgather_baseidx_sext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB55_14: # %else14 -; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: beqz t2, .LBB55_17 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 @@ -6352,10 +6352,10 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi a3, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: beqz a3, .LBB56_7 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: beqz a1, .LBB56_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a3, v8 ; RV32ZVE32F-NEXT: lw a1, 0(a3) @@ -6519,9 +6519,9 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: .LBB56_5: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a6, a5, 4 ; RV64ZVE32F-NEXT: beqz a6, .LBB56_10 ; RV64ZVE32F-NEXT: # %bb.6: # %cond.load4 ; RV64ZVE32F-NEXT: vmv.x.s a6, v8 @@ -6571,8 +6571,8 @@ define <8 x i64> @mgather_baseidx_zext_v8i32_v8i64(ptr %base, <8 x i32> %idxs, < ; RV64ZVE32F-NEXT: add t1, a1, t1 ; RV64ZVE32F-NEXT: ld t1, 0(t1) ; RV64ZVE32F-NEXT: .LBB56_14: # %else14 -; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi t2, a5, 64 ; RV64ZVE32F-NEXT: beqz t2, .LBB56_17 ; RV64ZVE32F-NEXT: # %bb.15: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s t2, v8 @@ -6654,9 +6654,9 @@ define <8 x i64> @mgather_baseidx_v8i64(ptr %base, <8 x i64> %idxs, <8 x i1> %m, ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a7 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: andi a2, t0, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: beqz a2, .LBB57_7 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: beqz a1, .LBB57_7 ; RV32ZVE32F-NEXT: # %bb.1: # %cond.load ; RV32ZVE32F-NEXT: vmv.x.s a2, v8 ; RV32ZVE32F-NEXT: lw a1, 0(a2) @@ -7073,14 +7073,14 @@ define <4 x bfloat> @mgather_truemask_v4bf16(<4 x ptr> %ptrs, <4 x bfloat> %pass ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: ld a3, 16(a0) ; RV64ZVE32F-NEXT: ld a0, 24(a0) -; RV64ZVE32F-NEXT: lh a1, 0(a1) ; RV64ZVE32F-NEXT: lh a2, 0(a2) +; RV64ZVE32F-NEXT: lh a1, 0(a1) ; RV64ZVE32F-NEXT: lh a3, 0(a3) -; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-NEXT: lh a0, 0(a0) ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-NEXT: ret %v = call <4 x bfloat> @llvm.masked.gather.v4bf16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x bfloat> %passthru) @@ -7271,9 +7271,9 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: .LBB64_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -7296,9 +7296,9 @@ define <8 x bfloat> @mgather_baseidx_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB64_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB64_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -7420,9 +7420,9 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB65_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -7445,9 +7445,9 @@ define <8 x bfloat> @mgather_baseidx_sext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB65_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB65_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -7570,9 +7570,9 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB66_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -7596,9 +7596,9 @@ define <8 x bfloat> @mgather_baseidx_zext_v8i8_v8bf16(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB66_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB66_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -7723,9 +7723,9 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: .LBB67_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB67_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -7747,9 +7747,9 @@ define <8 x bfloat> @mgather_baseidx_v8bf16(ptr %base, <8 x i16> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-NEXT: .LBB67_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -8085,14 +8085,14 @@ define <4 x half> @mgather_truemask_v4f16(<4 x ptr> %ptrs, <4 x half> %passthru) ; RV64ZVE32F-ZVFHMIN-NEXT: ld a2, 8(a0) ; RV64ZVE32F-ZVFHMIN-NEXT: ld a3, 16(a0) ; RV64ZVE32F-ZVFHMIN-NEXT: ld a0, 24(a0) -; RV64ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1) ; RV64ZVE32F-ZVFHMIN-NEXT: lh a2, 0(a2) +; RV64ZVE32F-ZVFHMIN-NEXT: lh a1, 0(a1) ; RV64ZVE32F-ZVFHMIN-NEXT: lh a3, 0(a3) -; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32F-ZVFHMIN-NEXT: lh a0, 0(a0) ; RV64ZVE32F-ZVFHMIN-NEXT: vslide1down.vx v8, v8, a0 ; RV64ZVE32F-ZVFHMIN-NEXT: ret %v = call <4 x half> @llvm.masked.gather.v4f16.v4p0(<4 x ptr> %ptrs, i32 2, <4 x i1> splat (i1 1), <4 x half> %passthru) @@ -8376,9 +8376,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB74_14 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 @@ -8401,9 +8401,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-ZVFH-NEXT: .LBB74_9: # %else14 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB74_11 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 @@ -8500,9 +8500,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_4: # %else2 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB74_14 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 @@ -8525,9 +8525,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB74_9: # %else14 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB74_11 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 @@ -8649,9 +8649,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_4: # %else2 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB75_14 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 @@ -8674,9 +8674,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-ZVFH-NEXT: .LBB75_9: # %else14 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB75_11 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 @@ -8773,9 +8773,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_4: # %else2 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB75_14 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 @@ -8798,9 +8798,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB75_9: # %else14 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB75_11 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 @@ -8923,9 +8923,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_4: # %else2 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB76_14 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 @@ -8949,9 +8949,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-ZVFH-NEXT: .LBB76_9: # %else14 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB76_11 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 @@ -9055,9 +9055,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_4: # %else2 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB76_14 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 @@ -9081,9 +9081,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB76_9: # %else14 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB76_11 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 @@ -9208,9 +9208,9 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFH-NEXT: .LBB77_4: # %else2 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB77_14 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 @@ -9232,9 +9232,9 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-ZVFH-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-ZVFH-NEXT: .LBB77_9: # %else14 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: beqz a2, .LBB77_11 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-ZVFH-NEXT: vmv.x.s a2, v8 @@ -9324,9 +9324,9 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_4: # %else2 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB77_14 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 @@ -9348,9 +9348,9 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 6, e16, m1, tu, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslideup.vi v9, v8, 5 ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB77_9: # %else14 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: beqz a2, .LBB77_11 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-ZVFHMIN-NEXT: vmv.x.s a2, v8 @@ -9791,9 +9791,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: .LBB84_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -9816,9 +9816,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB84_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB84_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -9939,9 +9939,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: .LBB85_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB85_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -9964,9 +9964,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB85_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB85_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10091,9 +10091,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: .LBB86_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB86_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -10117,9 +10117,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB86_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB86_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10248,9 +10248,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: .LBB87_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB87_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -10273,9 +10273,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB87_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB87_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10398,9 +10398,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB88_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB88_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -10423,9 +10423,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB88_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB88_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10551,9 +10551,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB89_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB89_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -10577,9 +10577,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 5 ; RV64ZVE32F-NEXT: .LBB89_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB89_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -10702,9 +10702,9 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: .LBB90_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB90_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -10726,9 +10726,9 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: vsetivli zero, 6, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 5 ; RV64ZVE32F-NEXT: .LBB90_9: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v12, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: beqz a2, .LBB90_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -11308,9 +11308,9 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV32ZVE32F-NEXT: vmv.x.s a2, v0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB97_10 +; RV32ZVE32F-NEXT: andi a1, a2, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB97_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 ; RV32ZVE32F-NEXT: bnez a1, .LBB97_11 @@ -11420,9 +11420,9 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: .LBB97_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: bnez a3, .LBB97_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 @@ -11440,8 +11440,8 @@ define <8 x double> @mgather_baseidx_v8i8_v8f64(ptr %base, <8 x i8> %idxs, <8 x ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB97_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: beqz a3, .LBB97_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -11523,9 +11523,9 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a2, v0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB98_10 +; RV32ZVE32F-NEXT: andi a1, a2, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB98_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 ; RV32ZVE32F-NEXT: bnez a1, .LBB98_11 @@ -11635,9 +11635,9 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB98_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: bnez a3, .LBB98_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 @@ -11655,8 +11655,8 @@ define <8 x double> @mgather_baseidx_sext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB98_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: beqz a3, .LBB98_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -11740,9 +11740,9 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV32ZVE32F-NEXT: vmv.x.s a2, v0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB99_10 +; RV32ZVE32F-NEXT: andi a1, a2, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB99_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 ; RV32ZVE32F-NEXT: bnez a1, .LBB99_11 @@ -11854,9 +11854,9 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB99_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: bnez a3, .LBB99_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 @@ -11875,8 +11875,8 @@ define <8 x double> @mgather_baseidx_zext_v8i8_v8f64(ptr %base, <8 x i8> %idxs, ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB99_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: beqz a3, .LBB99_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -11963,10 +11963,10 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV32ZVE32F-NEXT: li a2, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a3, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32ZVE32F-NEXT: vwmaccus.vx v10, a2, v8 -; RV32ZVE32F-NEXT: bnez a3, .LBB100_10 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: bnez a2, .LBB100_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: bnez a2, .LBB100_11 @@ -12078,9 +12078,9 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB100_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: bnez a3, .LBB100_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 @@ -12098,8 +12098,8 @@ define <8 x double> @mgather_baseidx_v8i16_v8f64(ptr %base, <8 x i16> %idxs, <8 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB100_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: beqz a3, .LBB100_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -12180,10 +12180,10 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: li a2, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a3, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32ZVE32F-NEXT: vwmaccus.vx v10, a2, v8 -; RV32ZVE32F-NEXT: bnez a3, .LBB101_10 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: bnez a2, .LBB101_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: bnez a2, .LBB101_11 @@ -12295,9 +12295,9 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: .LBB101_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: bnez a3, .LBB101_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 @@ -12315,8 +12315,8 @@ define <8 x double> @mgather_baseidx_sext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB101_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: beqz a3, .LBB101_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -12399,10 +12399,10 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV32ZVE32F-NEXT: li a2, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a3, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32ZVE32F-NEXT: vwmaccu.vx v10, a2, v8 -; RV32ZVE32F-NEXT: bnez a3, .LBB102_10 +; RV32ZVE32F-NEXT: andi a2, a1, 1 +; RV32ZVE32F-NEXT: bnez a2, .LBB102_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a2, a1, 2 ; RV32ZVE32F-NEXT: bnez a2, .LBB102_11 @@ -12516,9 +12516,9 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: .LBB102_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: bnez a3, .LBB102_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 @@ -12537,8 +12537,8 @@ define <8 x double> @mgather_baseidx_zext_v8i16_v8f64(ptr %base, <8 x i16> %idxs ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB102_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: beqz a3, .LBB102_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -12624,10 +12624,10 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a2, v0 -; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB103_10 +; RV32ZVE32F-NEXT: andi a1, a2, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB103_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 ; RV32ZVE32F-NEXT: bnez a1, .LBB103_11 @@ -12738,9 +12738,9 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB103_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: bnez a3, .LBB103_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 @@ -12758,8 +12758,8 @@ define <8 x double> @mgather_baseidx_v8i32_v8f64(ptr %base, <8 x i32> %idxs, <8 ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB103_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: beqz a3, .LBB103_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -12839,10 +12839,10 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a2, v0 -; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB104_10 +; RV32ZVE32F-NEXT: andi a1, a2, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB104_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 ; RV32ZVE32F-NEXT: bnez a1, .LBB104_11 @@ -12953,9 +12953,9 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: .LBB104_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: bnez a3, .LBB104_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 @@ -12973,8 +12973,8 @@ define <8 x double> @mgather_baseidx_sext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB104_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: beqz a3, .LBB104_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -13055,10 +13055,10 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a2, v0 -; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB105_10 +; RV32ZVE32F-NEXT: andi a1, a2, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB105_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 ; RV32ZVE32F-NEXT: bnez a1, .LBB105_11 @@ -13171,9 +13171,9 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: .LBB105_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 4 ; RV64ZVE32F-NEXT: bnez a3, .LBB105_14 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a3, a2, 8 @@ -13192,8 +13192,8 @@ define <8 x double> @mgather_baseidx_zext_v8i32_v8f64(ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: add a3, a1, a3 ; RV64ZVE32F-NEXT: fld fa5, 0(a3) ; RV64ZVE32F-NEXT: .LBB105_9: # %else14 -; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a3, a2, 64 ; RV64ZVE32F-NEXT: beqz a3, .LBB105_11 ; RV64ZVE32F-NEXT: # %bb.10: # %cond.load16 ; RV64ZVE32F-NEXT: vmv.x.s a3, v8 @@ -13295,9 +13295,9 @@ define <8 x double> @mgather_baseidx_v8f64(ptr %base, <8 x i64> %idxs, <8 x i1> ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a6 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: andi a3, a2, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez a3, .LBB106_10 +; RV32ZVE32F-NEXT: andi a1, a2, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB106_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a2, 2 ; RV32ZVE32F-NEXT: bnez a1, .LBB106_11 @@ -13528,9 +13528,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: .LBB107_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_25 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -13546,9 +13546,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v11, a2 ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 4 ; RV64ZVE32F-NEXT: .LBB107_8: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 +; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB107_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -13560,9 +13560,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v11, 5 ; RV64ZVE32F-NEXT: .LBB107_10: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_27 ; RV64ZVE32F-NEXT: # %bb.11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a1, 128 @@ -13585,9 +13585,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: .LBB107_15: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: bnez a2, .LBB107_30 ; RV64ZVE32F-NEXT: # %bb.16: # %else29 ; RV64ZVE32F-NEXT: slli a2, a1, 52 @@ -13608,9 +13608,9 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 13 ; RV64ZVE32F-NEXT: .LBB107_20: # %else38 -; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: bgez a2, .LBB107_22 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.load40 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -13741,15 +13741,14 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64V-NEXT: vsext.vf8 v16, v8 ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64V-NEXT: vslidedown.vi v12, v10, 16 -; RV64V-NEXT: vslidedown.vi v14, v8, 16 -; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64V-NEXT: vslidedown.vi v8, v0, 2 +; RV64V-NEXT: vslidedown.vi v8, v8, 16 ; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-NEXT: vluxei64.v v10, (a0), v16, v0.t ; RV64V-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV64V-NEXT: vsext.vf8 v16, v14 -; RV64V-NEXT: vmv1r.v v0, v8 -; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64V-NEXT: vsext.vf8 v16, v8 +; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64V-NEXT: vslidedown.vi v0, v0, 2 +; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; RV64V-NEXT: vluxei64.v v12, (a0), v16, v0.t ; RV64V-NEXT: li a0, 32 ; RV64V-NEXT: vsetvli zero, a0, e8, m2, ta, ma @@ -13784,9 +13783,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB108_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_49 ; RV64ZVE32F-NEXT: # %bb.5: # %else5 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -13802,9 +13801,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 4 ; RV64ZVE32F-NEXT: .LBB108_8: # %else11 -; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 8 +; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.load13 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -13816,9 +13815,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 6, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v14, 5 ; RV64ZVE32F-NEXT: .LBB108_10: # %else14 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB108_51 ; RV64ZVE32F-NEXT: # %bb.11: # %else17 ; RV64ZVE32F-NEXT: andi a2, a1, 128 @@ -13841,9 +13840,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB108_15: # %else26 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: beqz a2, .LBB108_17 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.load28 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 @@ -13865,9 +13864,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 12, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 11 ; RV64ZVE32F-NEXT: .LBB108_19: # %else32 -; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 16 +; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_21 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.load34 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 @@ -13889,9 +13888,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 14, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v9, 13 ; RV64ZVE32F-NEXT: .LBB108_23: # %else38 -; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v13, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_54 ; RV64ZVE32F-NEXT: # %bb.24: # %else41 ; RV64ZVE32F-NEXT: slli a2, a1, 48 @@ -13914,9 +13913,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB108_28: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: slli a2, a1, 45 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 45 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_57 ; RV64ZVE32F-NEXT: # %bb.29: # %else53 ; RV64ZVE32F-NEXT: slli a2, a1, 44 @@ -13932,9 +13931,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vmv.s.x v12, a2 ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 20 ; RV64ZVE32F-NEXT: .LBB108_32: # %else59 -; RV64ZVE32F-NEXT: slli a2, a1, 42 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 8 +; RV64ZVE32F-NEXT: slli a2, a1, 42 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_34 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.load61 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -13946,9 +13945,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 22, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 21 ; RV64ZVE32F-NEXT: .LBB108_34: # %else62 -; RV64ZVE32F-NEXT: slli a2, a1, 41 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 41 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_59 ; RV64ZVE32F-NEXT: # %bb.35: # %else65 ; RV64ZVE32F-NEXT: slli a2, a1, 40 @@ -13971,9 +13970,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: .LBB108_39: # %else74 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: slli a2, a1, 37 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 37 ; RV64ZVE32F-NEXT: bltz a2, .LBB108_62 ; RV64ZVE32F-NEXT: # %bb.40: # %else77 ; RV64ZVE32F-NEXT: slli a2, a1, 36 @@ -13994,9 +13993,9 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: vsetivli zero, 30, e8, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v12, 29 ; RV64ZVE32F-NEXT: .LBB108_44: # %else86 -; RV64ZVE32F-NEXT: slli a2, a1, 33 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 33 ; RV64ZVE32F-NEXT: bgez a2, .LBB108_46 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.load88 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 @@ -14279,8 +14278,8 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) { ; RV32: # %bb.0: ; RV32-NEXT: li a1, -512 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vmv.v.x v8, a1 +; RV32-NEXT: vmv.v.i v0, 5 ; RV32-NEXT: vmerge.vim v8, v8, 0, v0 ; RV32-NEXT: vluxei32.v v8, (a0), v8 ; RV32-NEXT: ret @@ -14288,10 +14287,11 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) { ; RV64V-LABEL: mgather_narrow_edge_case: ; RV64V: # %bb.0: ; RV64V-NEXT: li a1, -512 +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vsetivli zero, 1, e8, mf8, ta, ma ; RV64V-NEXT: vmv.v.i v0, 5 ; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64V-NEXT: vmv.v.x v8, a1 ; RV64V-NEXT: vmerge.vim v10, v8, 0, v0 ; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64V-NEXT: vluxei64.v v8, (a0), v10 @@ -14302,8 +14302,8 @@ define <4 x i32> @mgather_narrow_edge_case(ptr %base) { ; RV64ZVE32F-NEXT: lw a1, -512(a0) ; RV64ZVE32F-NEXT: lw a0, 0(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.v.i v0, 5 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 +; RV64ZVE32F-NEXT: vmv.v.i v0, 5 ; RV64ZVE32F-NEXT: vmerge.vxm v8, v8, a0, v0 ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i32, ptr %base, <4 x i8> @@ -14337,36 +14337,36 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV32-NEXT: lbu a0, 1(a0) ; RV32-NEXT: vmv.x.s a7, v10 ; RV32-NEXT: vmv.x.s t0, v8 -; RV32-NEXT: lbu t1, 0(a1) -; RV32-NEXT: lbu a1, 1(a1) -; RV32-NEXT: lbu t2, 0(a2) -; RV32-NEXT: lbu a2, 1(a2) ; RV32-NEXT: slli a0, a0, 8 ; RV32-NEXT: or a0, a0, a6 -; RV32-NEXT: lbu a6, 0(a3) -; RV32-NEXT: lbu a3, 1(a3) +; RV32-NEXT: lbu a6, 0(a1) +; RV32-NEXT: lbu a1, 1(a1) ; RV32-NEXT: slli a1, a1, 8 -; RV32-NEXT: or a1, a1, t1 -; RV32-NEXT: lbu t1, 0(a4) -; RV32-NEXT: lbu a4, 1(a4) +; RV32-NEXT: or a1, a1, a6 +; RV32-NEXT: lbu a6, 0(a2) +; RV32-NEXT: lbu a2, 1(a2) ; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a2, a2, t2 -; RV32-NEXT: lbu t2, 0(a5) -; RV32-NEXT: lbu a5, 1(a5) +; RV32-NEXT: or a2, a2, a6 +; RV32-NEXT: lbu a6, 0(a3) +; RV32-NEXT: lbu a3, 1(a3) ; RV32-NEXT: slli a3, a3, 8 ; RV32-NEXT: or a3, a3, a6 -; RV32-NEXT: lbu a6, 0(a7) -; RV32-NEXT: lbu a7, 1(a7) +; RV32-NEXT: lbu a6, 0(a4) +; RV32-NEXT: lbu a4, 1(a4) ; RV32-NEXT: slli a4, a4, 8 -; RV32-NEXT: or a4, a4, t1 -; RV32-NEXT: lbu t1, 0(t0) -; RV32-NEXT: lbu t0, 1(t0) +; RV32-NEXT: or a4, a4, a6 +; RV32-NEXT: lbu a6, 0(a5) +; RV32-NEXT: lbu a5, 1(a5) ; RV32-NEXT: slli a5, a5, 8 -; RV32-NEXT: or a5, a5, t2 +; RV32-NEXT: or a5, a5, a6 +; RV32-NEXT: lbu a6, 0(a7) +; RV32-NEXT: lbu a7, 1(a7) ; RV32-NEXT: slli a7, a7, 8 ; RV32-NEXT: or a6, a7, a6 +; RV32-NEXT: lbu a7, 0(t0) +; RV32-NEXT: lbu t0, 1(t0) ; RV32-NEXT: slli t0, t0, 8 -; RV32-NEXT: or a7, t0, t1 +; RV32-NEXT: or a7, t0, a7 ; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV32-NEXT: vmv.v.x v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 @@ -14375,8 +14375,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV32-NEXT: vslide1down.vx v9, v9, a5 ; RV32-NEXT: vslide1down.vx v10, v8, a3 ; RV32-NEXT: vslide1down.vx v8, v9, a6 -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v8, v8, a7 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV32-NEXT: ret ; @@ -14450,8 +14450,8 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64V-NEXT: vmv.v.x v8, a3 ; RV64V-NEXT: vslide1down.vx v8, v8, a0 ; RV64V-NEXT: vslide1down.vx v8, v8, a1 -; RV64V-NEXT: vmv.v.i v0, 15 ; RV64V-NEXT: vslide1down.vx v8, v8, a2 +; RV64V-NEXT: vmv.v.i v0, 15 ; RV64V-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64V-NEXT: addi sp, s0, -128 ; RV64V-NEXT: .cfi_def_cfa sp, 128 @@ -14475,38 +14475,38 @@ define <8 x i16> @mgather_strided_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: lbu t0, 13(a0) ; RV64ZVE32F-NEXT: slli a2, a2, 8 ; RV64ZVE32F-NEXT: slli a4, a4, 8 +; RV64ZVE32F-NEXT: slli a6, a6, 8 ; RV64ZVE32F-NEXT: or a1, a2, a1 ; RV64ZVE32F-NEXT: or a3, a4, a3 -; RV64ZVE32F-NEXT: lbu a2, 16(a0) -; RV64ZVE32F-NEXT: lbu a4, 17(a0) -; RV64ZVE32F-NEXT: lbu t1, 20(a0) -; RV64ZVE32F-NEXT: lbu t2, 21(a0) -; RV64ZVE32F-NEXT: slli a6, a6, 8 -; RV64ZVE32F-NEXT: or a5, a6, a5 +; RV64ZVE32F-NEXT: or a2, a6, a5 +; RV64ZVE32F-NEXT: lbu a4, 16(a0) +; RV64ZVE32F-NEXT: lbu a5, 17(a0) +; RV64ZVE32F-NEXT: lbu a6, 20(a0) +; RV64ZVE32F-NEXT: lbu t1, 21(a0) ; RV64ZVE32F-NEXT: slli t0, t0, 8 -; RV64ZVE32F-NEXT: slli a4, a4, 8 -; RV64ZVE32F-NEXT: slli t2, t2, 8 -; RV64ZVE32F-NEXT: or a6, t0, a7 -; RV64ZVE32F-NEXT: or a2, a4, a2 -; RV64ZVE32F-NEXT: lbu a4, 24(a0) -; RV64ZVE32F-NEXT: lbu a7, 25(a0) -; RV64ZVE32F-NEXT: or t0, t2, t1 +; RV64ZVE32F-NEXT: slli a5, a5, 8 +; RV64ZVE32F-NEXT: slli t1, t1, 8 +; RV64ZVE32F-NEXT: or a7, t0, a7 +; RV64ZVE32F-NEXT: or a4, a5, a4 +; RV64ZVE32F-NEXT: or a5, t1, a6 +; RV64ZVE32F-NEXT: lbu a6, 24(a0) +; RV64ZVE32F-NEXT: lbu t0, 25(a0) ; RV64ZVE32F-NEXT: lbu t1, 28(a0) ; RV64ZVE32F-NEXT: lbu a0, 29(a0) -; RV64ZVE32F-NEXT: slli a7, a7, 8 -; RV64ZVE32F-NEXT: or a4, a7, a4 -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 +; RV64ZVE32F-NEXT: slli t0, t0, 8 +; RV64ZVE32F-NEXT: or a6, t0, a6 ; RV64ZVE32F-NEXT: slli a0, a0, 8 ; RV64ZVE32F-NEXT: or a0, a0, t1 +; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vmv.v.x v9, a2 +; RV64ZVE32F-NEXT: vmv.v.x v9, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, t0 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a4 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a6 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14541,7 +14541,6 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 24(a0) ; RV64ZVE32F-NEXT: lh a0, 26(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14550,6 +14549,7 @@ define <8 x i16> @mgather_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14586,7 +14586,6 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 28(a0) ; RV64ZVE32F-NEXT: lh a0, 30(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14595,6 +14594,7 @@ define <8 x i16> @mgather_strided_2xSEW_with_offset(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -14631,7 +14631,6 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 20(a0) ; RV64ZVE32F-NEXT: lh a0, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a3 ; RV64ZVE32F-NEXT: vmv.v.x v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 @@ -14640,6 +14639,7 @@ define <8 x i16> @mgather_reverse_unit_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -14676,7 +14676,6 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 12(a0) ; RV64ZVE32F-NEXT: lh a0, 14(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a3 ; RV64ZVE32F-NEXT: vmv.v.x v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a4 @@ -14685,6 +14684,7 @@ define <8 x i16> @mgather_reverse_strided_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -14720,7 +14720,6 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 4(a0) ; RV64ZVE32F-NEXT: lh a0, 6(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vmv.v.x v9, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 @@ -14729,6 +14728,7 @@ define <8 x i16> @mgather_gather_2xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14767,7 +14767,6 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 4(a0) ; RV64ZVE32F-NEXT: lh a0, 6(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a5 ; RV64ZVE32F-NEXT: vmv.v.x v9, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 @@ -14776,6 +14775,7 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14806,23 +14806,23 @@ define <8 x i16> @mgather_gather_2xSEW_unaligned2(ptr %base) { ; ; RV64ZVE32F-LABEL: mgather_gather_2xSEW_unaligned2: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: lh a1, 2(a0) -; RV64ZVE32F-NEXT: lh a2, 4(a0) -; RV64ZVE32F-NEXT: lh a3, 6(a0) -; RV64ZVE32F-NEXT: lh a4, 8(a0) -; RV64ZVE32F-NEXT: lh a5, 10(a0) -; RV64ZVE32F-NEXT: lh a6, 18(a0) -; RV64ZVE32F-NEXT: lh a0, 20(a0) +; RV64ZVE32F-NEXT: lh a1, 10(a0) +; RV64ZVE32F-NEXT: lh a2, 18(a0) +; RV64ZVE32F-NEXT: lh a3, 20(a0) +; RV64ZVE32F-NEXT: lh a4, 2(a0) +; RV64ZVE32F-NEXT: lh a5, 4(a0) +; RV64ZVE32F-NEXT: lh a6, 6(a0) +; RV64ZVE32F-NEXT: lh a0, 8(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 -; RV64ZVE32F-NEXT: vmv.v.x v8, a1 -; RV64ZVE32F-NEXT: vmv.v.x v9, a4 +; RV64ZVE32F-NEXT: vmv.v.x v8, a4 +; RV64ZVE32F-NEXT: vmv.v.x v9, a0 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a5 +; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a1 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a5 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a6 -; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a2 -; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a0 -; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a3 +; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a6 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14865,7 +14865,6 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 20(a0) ; RV64ZVE32F-NEXT: lh a0, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14874,6 +14873,7 @@ define <8 x i16> @mgather_gather_4xSEW(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14913,7 +14913,6 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 20(a0) ; RV64ZVE32F-NEXT: lh a0, 22(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14922,6 +14921,7 @@ define <8 x i16> @mgather_gather_4xSEW_partial_align(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i32> @@ -14970,7 +14970,6 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 4(a0) ; RV64ZVE32F-NEXT: lh a0, 6(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a2 @@ -14979,6 +14978,7 @@ define <8 x i16> @mgather_shuffle_rotate(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a4 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> @@ -15018,7 +15018,6 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV64ZVE32F-NEXT: lh a7, 12(a0) ; RV64ZVE32F-NEXT: lh a0, 14(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m1, ta, mu -; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vmv.v.x v8, a1 ; RV64ZVE32F-NEXT: vmv.v.x v9, a5 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a3 @@ -15027,6 +15026,7 @@ define <8 x i16> @mgather_shuffle_vrgather(ptr %base) { ; RV64ZVE32F-NEXT: vslide1down.vx v9, v9, a7 ; RV64ZVE32F-NEXT: vslide1down.vx v10, v8, a2 ; RV64ZVE32F-NEXT: vslide1down.vx v8, v9, a0 +; RV64ZVE32F-NEXT: vmv.v.i v0, 15 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 4, v0.t ; RV64ZVE32F-NEXT: ret %ptrs = getelementptr inbounds i16, ptr %base, <8 x i64> diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll index f72b08a405246..f27c8e5d664e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-fp.ll @@ -242,9 +242,9 @@ define <32 x double> @masked_load_v32f64(ptr %a, <32 x i1> %mask) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -278,12 +278,12 @@ define <64 x float> @masked_load_v64f32(ptr %a, <64 x i1> %mask) { ; CHECK-LABEL: masked_load_v64f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v0, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <64 x float> @llvm.masked.load.v64f32(ptr %a, i32 8, <64 x i1> %mask, <64 x float> undef) @@ -294,12 +294,12 @@ define <128 x bfloat> @masked_load_v128bf16(ptr %a, <128 x i1> %mask) { ; CHECK-LABEL: masked_load_v128bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <128 x bfloat> @llvm.masked.load.v128bf16(ptr %a, i32 8, <128 x i1> %mask, <128 x bfloat> undef) @@ -310,12 +310,12 @@ define <128 x half> @masked_load_v128f16(ptr %a, <128 x i1> %mask) { ; CHECK-LABEL: masked_load_v128f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <128 x half> @llvm.masked.load.v128f16(ptr %a, i32 8, <128 x i1> %mask, <128 x half> undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll index 69903d77084bf..6e613917f8cd9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-load-int.ll @@ -240,9 +240,9 @@ define <32 x i64> @masked_load_v32i64(ptr %a, <32 x i1> %mask) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -276,12 +276,12 @@ define <64 x i32> @masked_load_v64i32(ptr %a, <64 x i1> %mask) { ; CHECK-LABEL: masked_load_v64i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v0, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <64 x i32> @llvm.masked.load.v64i32(ptr %a, i32 8, <64 x i1> %mask, <64 x i32> undef) @@ -303,12 +303,12 @@ define <128 x i16> @masked_load_v128i16(ptr %a, <128 x i1> %mask) { ; CHECK-LABEL: masked_load_v128i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v16, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v16 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vle16.v v16, (a0), v0.t ; CHECK-NEXT: ret %load = call <128 x i16> @llvm.masked.load.v128i16(ptr %a, i32 8, <128 x i1> %mask, <128 x i16> undef) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index 7354f9afa9a71..7358fd4cfa0f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -123,9 +123,9 @@ define void @mscatter_v2i16_truncstore_v2i8(<2 x i16> %val, <2 x ptr> %ptrs, <2 ; RV64ZVE32F-LABEL: mscatter_v2i16_truncstore_v2i8: ; RV64ZVE32F: # %bb.0: ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 ; RV64ZVE32F-NEXT: andi a3, a2, 1 -; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 ; RV64ZVE32F-NEXT: bnez a3, .LBB2_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 @@ -181,8 +181,8 @@ define void @mscatter_v2i32_truncstore_v2i8(<2 x i32> %val, <2 x ptr> %ptrs, <2 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v0 -; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 +; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: bnez a3, .LBB3_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 @@ -229,11 +229,11 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i8: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lw a1, 8(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32ZVE32F-NEXT: vmv.s.x v9, a1 -; RV32ZVE32F-NEXT: vmv.s.x v10, a0 +; RV32ZVE32F-NEXT: vmv.s.x v9, a0 +; RV32ZVE32F-NEXT: vmv.s.x v10, a1 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t ; RV32ZVE32F-NEXT: ret @@ -244,8 +244,8 @@ define void @mscatter_v2i64_truncstore_v2i8(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; RV64ZVE32F-NEXT: vmv.s.x v9, a1 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 -; RV64ZVE32F-NEXT: andi a1, a0, 1 ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-NEXT: andi a1, a0, 1 ; RV64ZVE32F-NEXT: bnez a1, .LBB4_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a0, a0, 2 @@ -513,9 +513,9 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: .LBB9_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -535,9 +535,9 @@ define void @mscatter_baseidx_v8i8(<8 x i8> %val, ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB9_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB9_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -698,11 +698,11 @@ define void @mscatter_v2i32_truncstore_v2i16(<2 x i32> %val, <2 x ptr> %ptrs, <2 ; ; RV64ZVE32F-LABEL: mscatter_v2i32_truncstore_v2i16: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a2, v0 -; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vnsrl.wi v8, v8, 0 +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vmv.x.s a2, v0 +; RV64ZVE32F-NEXT: andi a3, a2, 1 ; RV64ZVE32F-NEXT: bnez a3, .LBB12_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 @@ -745,11 +745,11 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; ; RV32ZVE32F-LABEL: mscatter_v2i64_truncstore_v2i16: ; RV32ZVE32F: # %bb.0: -; RV32ZVE32F-NEXT: lw a1, 8(a0) -; RV32ZVE32F-NEXT: lw a0, 0(a0) +; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw a0, 8(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.s.x v9, a1 -; RV32ZVE32F-NEXT: vmv.s.x v10, a0 +; RV32ZVE32F-NEXT: vmv.s.x v9, a0 +; RV32ZVE32F-NEXT: vmv.s.x v10, a1 ; RV32ZVE32F-NEXT: vslideup.vi v10, v9, 1 ; RV32ZVE32F-NEXT: vsoxei32.v v10, (zero), v8, v0.t ; RV32ZVE32F-NEXT: ret @@ -761,9 +761,9 @@ define void @mscatter_v2i64_truncstore_v2i16(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 -; RV64ZVE32F-NEXT: andi a1, a0, 1 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 +; RV64ZVE32F-NEXT: andi a1, a0, 1 ; RV64ZVE32F-NEXT: bnez a1, .LBB13_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a0, a0, 2 @@ -1035,9 +1035,9 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB18_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -1058,9 +1058,9 @@ define void @mscatter_baseidx_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB18_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB18_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -1168,9 +1168,9 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB19_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -1191,9 +1191,9 @@ define void @mscatter_baseidx_sext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB19_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB19_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -1302,9 +1302,9 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB20_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -1326,9 +1326,9 @@ define void @mscatter_baseidx_zext_v8i8_v8i16(<8 x i16> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB20_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB20_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -1440,9 +1440,9 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: .LBB21_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -1463,9 +1463,9 @@ define void @mscatter_baseidx_v8i16(<8 x i16> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB21_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB21_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -1638,10 +1638,10 @@ define void @mscatter_v2i64_truncstore_v2i32(<2 x i64> %val, <2 x ptr> %ptrs, <2 ; RV64ZVE32F-NEXT: vmv.v.x v8, a0 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vmv.x.s a0, v0 -; RV64ZVE32F-NEXT: andi a4, a0, 1 ; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslide1down.vx v8, v8, a1 -; RV64ZVE32F-NEXT: bnez a4, .LBB24_3 +; RV64ZVE32F-NEXT: andi a1, a0, 1 +; RV64ZVE32F-NEXT: bnez a1, .LBB24_3 ; RV64ZVE32F-NEXT: # %bb.1: # %else ; RV64ZVE32F-NEXT: andi a0, a0, 2 ; RV64ZVE32F-NEXT: bnez a0, .LBB24_4 @@ -1915,9 +1915,9 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB29_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -1939,9 +1939,9 @@ define void @mscatter_baseidx_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB29_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB29_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -2052,9 +2052,9 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2076,9 +2076,9 @@ define void @mscatter_baseidx_sext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB30_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB30_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -2193,9 +2193,9 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB31_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2218,9 +2218,9 @@ define void @mscatter_baseidx_zext_v8i8_v8i32(<8 x i32> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB31_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB31_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -2339,9 +2339,9 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: .LBB32_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2363,9 +2363,9 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB32_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB32_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -2478,9 +2478,9 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB33_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2502,9 +2502,9 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB33_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB33_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -2620,9 +2620,9 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB34_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2645,9 +2645,9 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB34_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB34_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -2761,9 +2761,9 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: .LBB35_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -2785,9 +2785,9 @@ define void @mscatter_baseidx_v8i32(<8 x i32> %val, ptr %base, <8 x i32> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB35_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -3425,13 +3425,13 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi s2, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez s2, .LBB42_10 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB42_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB42_11 @@ -3560,9 +3560,9 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: .LBB42_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a0, a5, 8 @@ -3580,8 +3580,8 @@ define void @mscatter_baseidx_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8> %id ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB42_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB42_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a0, a5, -128 @@ -3675,13 +3675,13 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsext.vf4 v10, v8 -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi s2, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez s2, .LBB43_10 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB43_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB43_11 @@ -3810,9 +3810,9 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB43_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a0, a5, 8 @@ -3830,8 +3830,8 @@ define void @mscatter_baseidx_sext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB43_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB43_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a0, a5, -128 @@ -3927,13 +3927,13 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV32ZVE32F-NEXT: lw t6, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vzext.vf4 v10, v8 -; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi s2, t0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez s2, .LBB44_10 +; RV32ZVE32F-NEXT: andi a1, t0, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB44_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, t0, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB44_11 @@ -4032,7 +4032,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i8_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a5, 40(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) ; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t1, 8(a0) @@ -4040,8 +4040,8 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: ld a7, 24(a0) ; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v0 -; RV64ZVE32F-NEXT: andi t2, a4, 1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 ; RV64ZVE32F-NEXT: beqz t2, .LBB44_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) @@ -4051,7 +4051,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB44_2: # %else -; RV64ZVE32F-NEXT: andi a0, a4, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -4064,18 +4064,18 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: .LBB44_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_13 ; RV64ZVE32F-NEXT: .LBB44_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_14 ; RV64ZVE32F-NEXT: .LBB44_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_9 ; RV64ZVE32F-NEXT: .LBB44_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 @@ -4083,13 +4083,13 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: andi a0, a0, 255 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB44_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_16 ; RV64ZVE32F-NEXT: .LBB44_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -4099,7 +4099,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_6 ; RV64ZVE32F-NEXT: .LBB44_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4108,7 +4108,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_7 ; RV64ZVE32F-NEXT: .LBB44_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 @@ -4116,7 +4116,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a6, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB44_8 ; RV64ZVE32F-NEXT: j .LBB44_9 ; RV64ZVE32F-NEXT: .LBB44_15: # %cond.store11 @@ -4125,7 +4125,7 @@ define void @mscatter_baseidx_zext_v8i8_v8i64(<8 x i64> %val, ptr %base, <8 x i8 ; RV64ZVE32F-NEXT: slli a0, a0, 3 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB44_11 ; RV64ZVE32F-NEXT: .LBB44_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4186,13 +4186,13 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.v.x v10, a1 -; RV32ZVE32F-NEXT: li s1, 8 +; RV32ZVE32F-NEXT: li a1, 8 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s2, a1, 1 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32ZVE32F-NEXT: vwmaccus.vx v10, s1, v8 -; RV32ZVE32F-NEXT: bnez s2, .LBB45_10 +; RV32ZVE32F-NEXT: andi s1, a1, 1 +; RV32ZVE32F-NEXT: bnez s1, .LBB45_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB45_11 @@ -4323,9 +4323,9 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: .LBB45_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a0, a5, 8 @@ -4343,8 +4343,8 @@ define void @mscatter_baseidx_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB45_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB45_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a0, a5, -128 @@ -4438,13 +4438,13 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.v.x v10, a1 -; RV32ZVE32F-NEXT: li s1, 8 +; RV32ZVE32F-NEXT: li a1, 8 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s2, a1, 1 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32ZVE32F-NEXT: vwmaccus.vx v10, s1, v8 -; RV32ZVE32F-NEXT: bnez s2, .LBB46_10 +; RV32ZVE32F-NEXT: andi s1, a1, 1 +; RV32ZVE32F-NEXT: bnez s1, .LBB46_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB46_11 @@ -4575,9 +4575,9 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB46_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a0, a5, 8 @@ -4595,8 +4595,8 @@ define void @mscatter_baseidx_sext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB46_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB46_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a0, a5, -128 @@ -4692,13 +4692,13 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.v.x v10, a1 -; RV32ZVE32F-NEXT: li s1, 8 +; RV32ZVE32F-NEXT: li a1, 8 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; RV32ZVE32F-NEXT: vwmaccu.vx v10, a1, v8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi s2, a1, 1 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; RV32ZVE32F-NEXT: vwmaccu.vx v10, s1, v8 -; RV32ZVE32F-NEXT: bnez s2, .LBB47_10 +; RV32ZVE32F-NEXT: andi s1, a1, 1 +; RV32ZVE32F-NEXT: bnez s1, .LBB47_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB47_11 @@ -4798,7 +4798,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; ; RV64ZVE32F-LABEL: mscatter_baseidx_zext_v8i16_v8i64: ; RV64ZVE32F: # %bb.0: -; RV64ZVE32F-NEXT: ld a5, 40(a0) +; RV64ZVE32F-NEXT: ld a4, 40(a0) ; RV64ZVE32F-NEXT: ld a3, 48(a0) ; RV64ZVE32F-NEXT: ld a2, 56(a0) ; RV64ZVE32F-NEXT: ld t1, 8(a0) @@ -4806,8 +4806,8 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: ld a7, 24(a0) ; RV64ZVE32F-NEXT: ld a6, 32(a0) ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; RV64ZVE32F-NEXT: vmv.x.s a4, v0 -; RV64ZVE32F-NEXT: andi t2, a4, 1 +; RV64ZVE32F-NEXT: vmv.x.s a5, v0 +; RV64ZVE32F-NEXT: andi t2, a5, 1 ; RV64ZVE32F-NEXT: beqz t2, .LBB47_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.store ; RV64ZVE32F-NEXT: ld a0, 0(a0) @@ -4818,7 +4818,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add t2, a1, t2 ; RV64ZVE32F-NEXT: sd a0, 0(t2) ; RV64ZVE32F-NEXT: .LBB47_2: # %else -; RV64ZVE32F-NEXT: andi a0, a4, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 2 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.store1 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -4831,18 +4831,18 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB47_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a4, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_13 ; RV64ZVE32F-NEXT: .LBB47_6: # %else6 -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_14 ; RV64ZVE32F-NEXT: .LBB47_7: # %else8 -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_9 ; RV64ZVE32F-NEXT: .LBB47_8: # %cond.store9 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 1 @@ -4850,13 +4850,13 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: slli a0, a0, 48 ; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 -; RV64ZVE32F-NEXT: sd a5, 0(a0) +; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB47_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a4, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_16 ; RV64ZVE32F-NEXT: .LBB47_11: # %else14 ; RV64ZVE32F-NEXT: ret @@ -4866,7 +4866,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd t0, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 8 +; RV64ZVE32F-NEXT: andi a0, a5, 8 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_6 ; RV64ZVE32F-NEXT: .LBB47_13: # %cond.store5 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4875,7 +4875,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a7, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 16 +; RV64ZVE32F-NEXT: andi a0, a5, 16 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_7 ; RV64ZVE32F-NEXT: .LBB47_14: # %cond.store7 ; RV64ZVE32F-NEXT: vmv.x.s a0, v9 @@ -4883,7 +4883,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a6, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, 32 +; RV64ZVE32F-NEXT: andi a0, a5, 32 ; RV64ZVE32F-NEXT: bnez a0, .LBB47_8 ; RV64ZVE32F-NEXT: j .LBB47_9 ; RV64ZVE32F-NEXT: .LBB47_15: # %cond.store11 @@ -4892,7 +4892,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: srli a0, a0, 45 ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a3, 0(a0) -; RV64ZVE32F-NEXT: andi a0, a4, -128 +; RV64ZVE32F-NEXT: andi a0, a5, -128 ; RV64ZVE32F-NEXT: beqz a0, .LBB47_11 ; RV64ZVE32F-NEXT: .LBB47_16: # %cond.store13 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 1 @@ -4943,42 +4943,41 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: lw a7, 44(a0) ; RV32ZVE32F-NEXT: lw a4, 48(a0) ; RV32ZVE32F-NEXT: lw a5, 52(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 28(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 36(a0) -; RV32ZVE32F-NEXT: lw s0, 8(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw t6, 20(a0) +; RV32ZVE32F-NEXT: lw t2, 24(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t0, 32(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw s0, 12(a0) +; RV32ZVE32F-NEXT: lw t4, 16(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi s2, t0, 1 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez s2, .LBB48_10 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi s1, a1, 1 +; RV32ZVE32F-NEXT: bnez s1, .LBB48_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_11 ; RV32ZVE32F-NEXT: .LBB48_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_12 ; RV32ZVE32F-NEXT: .LBB48_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_13 ; RV32ZVE32F-NEXT: .LBB48_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_14 ; RV32ZVE32F-NEXT: .LBB48_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_15 ; RV32ZVE32F-NEXT: .LBB48_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_16 ; RV32ZVE32F-NEXT: .LBB48_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_9 ; RV32ZVE32F-NEXT: .LBB48_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -4998,44 +4997,45 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB48_10: # %cond.store ; RV32ZVE32F-NEXT: .cfi_restore_state -; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw s1, 0(a0) ; RV32ZVE32F-NEXT: lw a0, 4(a0) +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw s1, 0(s2) ; RV32ZVE32F-NEXT: sw a0, 4(s2) -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_2 ; RV32ZVE32F-NEXT: .LBB48_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: sw s1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw s0, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_3 ; RV32ZVE32F-NEXT: .LBB48_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_4 ; RV32ZVE32F-NEXT: .LBB48_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_5 ; RV32ZVE32F-NEXT: .LBB48_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: sw t0, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_6 ; RV32ZVE32F-NEXT: .LBB48_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5043,7 +5043,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a6, 0(a0) ; RV32ZVE32F-NEXT: sw a7, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB48_7 ; RV32ZVE32F-NEXT: .LBB48_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5051,7 +5051,7 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB48_8 ; RV32ZVE32F-NEXT: j .LBB48_9 ; @@ -5088,9 +5088,9 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: .LBB48_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a0, a5, 8 @@ -5108,8 +5108,8 @@ define void @mscatter_baseidx_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i32> % ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB48_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB48_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a0, a5, -128 @@ -5193,42 +5193,41 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a7, 44(a0) ; RV32ZVE32F-NEXT: lw a4, 48(a0) ; RV32ZVE32F-NEXT: lw a5, 52(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 28(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 36(a0) -; RV32ZVE32F-NEXT: lw s0, 8(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw t6, 20(a0) +; RV32ZVE32F-NEXT: lw t2, 24(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t0, 32(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw s0, 12(a0) +; RV32ZVE32F-NEXT: lw t4, 16(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi s2, t0, 1 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez s2, .LBB49_10 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi s1, a1, 1 +; RV32ZVE32F-NEXT: bnez s1, .LBB49_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_11 ; RV32ZVE32F-NEXT: .LBB49_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_12 ; RV32ZVE32F-NEXT: .LBB49_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_13 ; RV32ZVE32F-NEXT: .LBB49_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_14 ; RV32ZVE32F-NEXT: .LBB49_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_15 ; RV32ZVE32F-NEXT: .LBB49_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_16 ; RV32ZVE32F-NEXT: .LBB49_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_9 ; RV32ZVE32F-NEXT: .LBB49_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5248,44 +5247,45 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB49_10: # %cond.store ; RV32ZVE32F-NEXT: .cfi_restore_state -; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw s1, 0(a0) ; RV32ZVE32F-NEXT: lw a0, 4(a0) +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw s1, 0(s2) ; RV32ZVE32F-NEXT: sw a0, 4(s2) -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_2 ; RV32ZVE32F-NEXT: .LBB49_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: sw s1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw s0, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_3 ; RV32ZVE32F-NEXT: .LBB49_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_4 ; RV32ZVE32F-NEXT: .LBB49_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_5 ; RV32ZVE32F-NEXT: .LBB49_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: sw t0, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_6 ; RV32ZVE32F-NEXT: .LBB49_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5293,7 +5293,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a6, 0(a0) ; RV32ZVE32F-NEXT: sw a7, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB49_7 ; RV32ZVE32F-NEXT: .LBB49_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5301,7 +5301,7 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB49_8 ; RV32ZVE32F-NEXT: j .LBB49_9 ; @@ -5338,9 +5338,9 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB49_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a0, a5, 8 @@ -5358,8 +5358,8 @@ define void @mscatter_baseidx_sext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB49_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB49_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a0, a5, -128 @@ -5444,42 +5444,41 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: lw a7, 44(a0) ; RV32ZVE32F-NEXT: lw a4, 48(a0) ; RV32ZVE32F-NEXT: lw a5, 52(a0) -; RV32ZVE32F-NEXT: lw t3, 24(a0) -; RV32ZVE32F-NEXT: lw t4, 28(a0) -; RV32ZVE32F-NEXT: lw t1, 32(a0) -; RV32ZVE32F-NEXT: lw t2, 36(a0) -; RV32ZVE32F-NEXT: lw s0, 8(a0) -; RV32ZVE32F-NEXT: lw s1, 12(a0) -; RV32ZVE32F-NEXT: lw t5, 16(a0) -; RV32ZVE32F-NEXT: lw t6, 20(a0) +; RV32ZVE32F-NEXT: lw t2, 24(a0) +; RV32ZVE32F-NEXT: lw t3, 28(a0) +; RV32ZVE32F-NEXT: lw t0, 32(a0) +; RV32ZVE32F-NEXT: lw t1, 36(a0) +; RV32ZVE32F-NEXT: lw t6, 8(a0) +; RV32ZVE32F-NEXT: lw s0, 12(a0) +; RV32ZVE32F-NEXT: lw t4, 16(a0) +; RV32ZVE32F-NEXT: lw t5, 20(a0) ; RV32ZVE32F-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32ZVE32F-NEXT: vmv.x.s t0, v0 -; RV32ZVE32F-NEXT: andi s2, t0, 1 -; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez s2, .LBB50_10 +; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; RV32ZVE32F-NEXT: vmv.x.s a1, v0 +; RV32ZVE32F-NEXT: andi s1, a1, 1 +; RV32ZVE32F-NEXT: bnez s1, .LBB50_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_11 ; RV32ZVE32F-NEXT: .LBB50_2: # %else2 -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_12 ; RV32ZVE32F-NEXT: .LBB50_3: # %else4 -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_13 ; RV32ZVE32F-NEXT: .LBB50_4: # %else6 -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_14 ; RV32ZVE32F-NEXT: .LBB50_5: # %else8 -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_15 ; RV32ZVE32F-NEXT: .LBB50_6: # %else10 -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_16 ; RV32ZVE32F-NEXT: .LBB50_7: # %else12 -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_9 ; RV32ZVE32F-NEXT: .LBB50_8: # %cond.store13 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5499,44 +5498,45 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: ret ; RV32ZVE32F-NEXT: .LBB50_10: # %cond.store ; RV32ZVE32F-NEXT: .cfi_restore_state -; RV32ZVE32F-NEXT: lw a1, 0(a0) +; RV32ZVE32F-NEXT: lw s1, 0(a0) ; RV32ZVE32F-NEXT: lw a0, 4(a0) +; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s s2, v8 -; RV32ZVE32F-NEXT: sw a1, 0(s2) +; RV32ZVE32F-NEXT: sw s1, 0(s2) ; RV32ZVE32F-NEXT: sw a0, 4(s2) -; RV32ZVE32F-NEXT: andi a0, t0, 2 +; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_2 ; RV32ZVE32F-NEXT: .LBB50_11: # %cond.store1 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 1 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw s0, 0(a0) -; RV32ZVE32F-NEXT: sw s1, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 4 +; RV32ZVE32F-NEXT: sw t6, 0(a0) +; RV32ZVE32F-NEXT: sw s0, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 4 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_3 ; RV32ZVE32F-NEXT: .LBB50_12: # %cond.store3 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 2 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t5, 0(a0) -; RV32ZVE32F-NEXT: sw t6, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 8 +; RV32ZVE32F-NEXT: sw t4, 0(a0) +; RV32ZVE32F-NEXT: sw t5, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 8 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_4 ; RV32ZVE32F-NEXT: .LBB50_13: # %cond.store5 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 3 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t3, 0(a0) -; RV32ZVE32F-NEXT: sw t4, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 16 +; RV32ZVE32F-NEXT: sw t2, 0(a0) +; RV32ZVE32F-NEXT: sw t3, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 16 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_5 ; RV32ZVE32F-NEXT: .LBB50_14: # %cond.store7 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vslidedown.vi v10, v8, 4 ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 -; RV32ZVE32F-NEXT: sw t1, 0(a0) -; RV32ZVE32F-NEXT: sw t2, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 32 +; RV32ZVE32F-NEXT: sw t0, 0(a0) +; RV32ZVE32F-NEXT: sw t1, 4(a0) +; RV32ZVE32F-NEXT: andi a0, a1, 32 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_6 ; RV32ZVE32F-NEXT: .LBB50_15: # %cond.store9 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5544,7 +5544,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a6, 0(a0) ; RV32ZVE32F-NEXT: sw a7, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, 64 +; RV32ZVE32F-NEXT: andi a0, a1, 64 ; RV32ZVE32F-NEXT: beqz a0, .LBB50_7 ; RV32ZVE32F-NEXT: .LBB50_16: # %cond.store11 ; RV32ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma @@ -5552,7 +5552,7 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV32ZVE32F-NEXT: vmv.x.s a0, v10 ; RV32ZVE32F-NEXT: sw a4, 0(a0) ; RV32ZVE32F-NEXT: sw a5, 4(a0) -; RV32ZVE32F-NEXT: andi a0, t0, -128 +; RV32ZVE32F-NEXT: andi a0, a1, -128 ; RV32ZVE32F-NEXT: bnez a0, .LBB50_8 ; RV32ZVE32F-NEXT: j .LBB50_9 ; @@ -5591,9 +5591,9 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: .LBB50_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 4 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a0, a5, 8 @@ -5612,8 +5612,8 @@ define void @mscatter_baseidx_zext_v8i32_v8i64(<8 x i64> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: add a0, a1, a0 ; RV64ZVE32F-NEXT: sd a4, 0(a0) ; RV64ZVE32F-NEXT: .LBB50_9: # %else10 -; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a0, a5, 64 ; RV64ZVE32F-NEXT: bnez a0, .LBB50_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a0, a5, -128 @@ -5745,9 +5745,9 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s4 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, s5 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: andi s2, a2, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a1 -; RV32ZVE32F-NEXT: bnez s2, .LBB51_10 +; RV32ZVE32F-NEXT: andi a1, a2, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB51_10 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a2, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB51_11 @@ -5928,8 +5928,8 @@ define void @mscatter_baseidx_v8i64(<8 x i64> %val, ptr %base, <8 x i64> %idxs, ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB51_10: # %cond.store ; RV64ZVE32F-NEXT: .cfi_restore_state -; RV64ZVE32F-NEXT: ld a2, 0(a2) ; RV64ZVE32F-NEXT: ld a0, 0(a0) +; RV64ZVE32F-NEXT: ld a2, 0(a2) ; RV64ZVE32F-NEXT: slli a2, a2, 3 ; RV64ZVE32F-NEXT: add a2, a1, a2 ; RV64ZVE32F-NEXT: sd a0, 0(a2) @@ -6350,9 +6350,9 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: .LBB58_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -6375,9 +6375,9 @@ define void @mscatter_baseidx_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB58_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB58_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -6501,9 +6501,9 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB59_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -6526,9 +6526,9 @@ define void @mscatter_baseidx_sext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB59_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB59_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -6653,9 +6653,9 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB60_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -6679,9 +6679,9 @@ define void @mscatter_baseidx_zext_v8i8_v8bf16(<8 x bfloat> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB60_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB60_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -6809,9 +6809,9 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id ; RV64ZVE32F-NEXT: .LBB61_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -6834,9 +6834,9 @@ define void @mscatter_baseidx_v8bf16(<8 x bfloat> %val, ptr %base, <8 x i16> %id ; RV64ZVE32F-NEXT: fmv.h.x fa5, a3 ; RV64ZVE32F-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB61_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB61_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -7453,9 +7453,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-ZVFH-NEXT: .LBB68_4: # %else2 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_12 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 @@ -7476,9 +7476,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-ZVFH-NEXT: .LBB68_9: # %else10 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB68_15 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 @@ -7567,9 +7567,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_4: # %else2 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_12 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 @@ -7592,9 +7592,9 @@ define void @mscatter_baseidx_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i8> %i ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB68_9: # %else10 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB68_15 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 @@ -7714,9 +7714,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-ZVFH-NEXT: .LBB69_4: # %else2 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_12 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 @@ -7737,9 +7737,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-ZVFH-NEXT: .LBB69_9: # %else10 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB69_15 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 @@ -7828,9 +7828,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_4: # %else2 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_12 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 @@ -7853,9 +7853,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB69_9: # %else10 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB69_15 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 @@ -7976,9 +7976,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-ZVFH-NEXT: .LBB70_4: # %else2 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_12 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 @@ -8000,9 +8000,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-ZVFH-NEXT: .LBB70_9: # %else10 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB70_15 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 @@ -8098,9 +8098,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_4: # %else2 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_12 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 @@ -8124,9 +8124,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f16(<8 x half> %val, ptr %base, <8 x i ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB70_9: # %else10 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB70_15 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 @@ -8250,9 +8250,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-ZVFH-NEXT: .LBB71_4: # %else2 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_12 ; RV64ZVE32F-ZVFH-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 8 @@ -8273,9 +8273,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v8, 5 ; RV64ZVE32F-ZVFH-NEXT: vse16.v v9, (a2) ; RV64ZVE32F-ZVFH-NEXT: .LBB71_9: # %else10 -; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFH-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFH-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFH-NEXT: bnez a2, .LBB71_15 ; RV64ZVE32F-ZVFH-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-ZVFH-NEXT: andi a1, a1, -128 @@ -8363,9 +8363,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_4: # %else2 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 4 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_12 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 8 @@ -8388,9 +8388,9 @@ define void @mscatter_baseidx_v8f16(<8 x half> %val, ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-ZVFHMIN-NEXT: fmv.h.x fa5, a3 ; RV64ZVE32F-ZVFHMIN-NEXT: fsh fa5, 0(a2) ; RV64ZVE32F-ZVFHMIN-NEXT: .LBB71_9: # %else10 -; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-ZVFHMIN-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-ZVFHMIN-NEXT: andi a2, a1, 64 ; RV64ZVE32F-ZVFHMIN-NEXT: bnez a2, .LBB71_15 ; RV64ZVE32F-ZVFHMIN-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-ZVFHMIN-NEXT: andi a1, a1, -128 @@ -8795,9 +8795,9 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: .LBB78_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -8819,9 +8819,9 @@ define void @mscatter_baseidx_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x i8> % ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB78_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -8932,9 +8932,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB79_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB79_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -8956,9 +8956,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB79_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB79_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -9073,9 +9073,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB80_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -9098,9 +9098,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB80_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB80_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -9219,9 +9219,9 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: .LBB81_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -9243,9 +9243,9 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB81_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB81_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -9358,9 +9358,9 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB82_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -9382,9 +9382,9 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB82_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB82_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -9500,9 +9500,9 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB83_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -9525,9 +9525,9 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB83_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB83_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -9641,9 +9641,9 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: .LBB84_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -9665,9 +9665,9 @@ define void @mscatter_baseidx_v8f32(<8 x float> %val, ptr %base, <8 x i32> %idxs ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v10, (a2) ; RV64ZVE32F-NEXT: .LBB84_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v12, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB84_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -10179,9 +10179,9 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB91_9 +; RV32ZVE32F-NEXT: andi a0, a1, 1 +; RV32ZVE32F-NEXT: bnez a0, .LBB91_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB91_10 @@ -10283,9 +10283,9 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: .LBB91_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -10303,8 +10303,8 @@ define void @mscatter_baseidx_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x i8> ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB91_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB91_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -10379,9 +10379,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB92_9 +; RV32ZVE32F-NEXT: andi a0, a1, 1 +; RV32ZVE32F-NEXT: bnez a0, .LBB92_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB92_10 @@ -10483,9 +10483,9 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB92_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -10503,8 +10503,8 @@ define void @mscatter_baseidx_sext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB92_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB92_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -10581,9 +10581,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vsll.vi v8, v10, 3 -; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB93_9 +; RV32ZVE32F-NEXT: andi a0, a1, 1 +; RV32ZVE32F-NEXT: bnez a0, .LBB93_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB93_10 @@ -10687,9 +10687,9 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: .LBB93_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB93_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -10708,8 +10708,8 @@ define void @mscatter_baseidx_zext_v8i8_v8f64(<8 x double> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB93_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB93_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -10789,10 +10789,10 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a2, a0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8 -; RV32ZVE32F-NEXT: bnez a2, .LBB94_9 +; RV32ZVE32F-NEXT: andi a1, a0, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB94_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: bnez a1, .LBB94_10 @@ -10896,9 +10896,9 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV64ZVE32F-NEXT: .LBB94_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB94_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -10916,8 +10916,8 @@ define void @mscatter_baseidx_v8i16_v8f64(<8 x double> %val, ptr %base, <8 x i16 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB94_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB94_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -10991,10 +10991,10 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a2, a0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32ZVE32F-NEXT: vwmaccus.vx v10, a1, v8 -; RV32ZVE32F-NEXT: bnez a2, .LBB95_9 +; RV32ZVE32F-NEXT: andi a1, a0, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB95_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: bnez a1, .LBB95_10 @@ -11098,9 +11098,9 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB95_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB95_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -11118,8 +11118,8 @@ define void @mscatter_baseidx_sext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB95_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB95_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -11195,10 +11195,10 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: li a1, 8 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a0, v0 -; RV32ZVE32F-NEXT: andi a2, a0, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32ZVE32F-NEXT: vwmaccu.vx v10, a1, v8 -; RV32ZVE32F-NEXT: bnez a2, .LBB96_9 +; RV32ZVE32F-NEXT: andi a1, a0, 1 +; RV32ZVE32F-NEXT: bnez a1, .LBB96_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a1, a0, 2 ; RV32ZVE32F-NEXT: bnez a1, .LBB96_10 @@ -11304,9 +11304,9 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB96_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -11325,8 +11325,8 @@ define void @mscatter_baseidx_zext_v8i16_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB96_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB96_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -11405,10 +11405,10 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB97_9 +; RV32ZVE32F-NEXT: andi a0, a1, 1 +; RV32ZVE32F-NEXT: bnez a0, .LBB97_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB97_10 @@ -11511,9 +11511,9 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV64ZVE32F-NEXT: .LBB97_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -11531,8 +11531,8 @@ define void @mscatter_baseidx_v8i32_v8f64(<8 x double> %val, ptr %base, <8 x i32 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB97_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB97_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -11605,10 +11605,10 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB98_9 +; RV32ZVE32F-NEXT: andi a0, a1, 1 +; RV32ZVE32F-NEXT: bnez a0, .LBB98_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB98_10 @@ -11711,9 +11711,9 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB98_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -11731,8 +11731,8 @@ define void @mscatter_baseidx_sext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB98_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB98_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -11806,10 +11806,10 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; RV32ZVE32F-NEXT: vmv.x.s a1, v0 -; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB99_9 +; RV32ZVE32F-NEXT: andi a0, a1, 1 +; RV32ZVE32F-NEXT: bnez a0, .LBB99_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB99_10 @@ -11914,9 +11914,9 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: .LBB99_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v8, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v8, v8, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB99_12 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -11935,8 +11935,8 @@ define void @mscatter_baseidx_zext_v8i32_v8f64(<8 x double> %val, ptr %base, <8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: fsd fa5, 0(a2) ; RV64ZVE32F-NEXT: .LBB99_9: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vslidedown.vi v8, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB99_15 ; RV64ZVE32F-NEXT: # %bb.10: # %else12 ; RV64ZVE32F-NEXT: andi a1, a1, -128 @@ -12031,9 +12031,9 @@ define void @mscatter_baseidx_v8f64(<8 x double> %val, ptr %base, <8 x i64> %idx ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a4 ; RV32ZVE32F-NEXT: vslide1down.vx v8, v8, a5 ; RV32ZVE32F-NEXT: vsll.vi v8, v8, 3 -; RV32ZVE32F-NEXT: andi a2, a1, 1 ; RV32ZVE32F-NEXT: vadd.vx v8, v8, a0 -; RV32ZVE32F-NEXT: bnez a2, .LBB100_9 +; RV32ZVE32F-NEXT: andi a0, a1, 1 +; RV32ZVE32F-NEXT: bnez a0, .LBB100_9 ; RV32ZVE32F-NEXT: # %bb.1: # %else ; RV32ZVE32F-NEXT: andi a0, a1, 2 ; RV32ZVE32F-NEXT: bnez a0, .LBB100_10 @@ -12244,9 +12244,9 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB101_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_25 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -12261,9 +12261,9 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB101_8: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 8 +; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB101_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -12274,9 +12274,9 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB101_10: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_27 ; RV64ZVE32F-NEXT: # %bb.11: # %else12 ; RV64ZVE32F-NEXT: andi a2, a1, 128 @@ -12298,9 +12298,9 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB101_15: # %else18 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v9, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v9, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: bnez a2, .LBB101_30 ; RV64ZVE32F-NEXT: # %bb.16: # %else20 ; RV64ZVE32F-NEXT: slli a2, a1, 52 @@ -12320,9 +12320,9 @@ define void @mscatter_baseidx_v16i8(<16 x i8> %val, ptr %base, <16 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v9, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v9, (a2) ; RV64ZVE32F-NEXT: .LBB101_20: # %else26 -; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v9, v10, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: bgez a2, .LBB101_22 ; RV64ZVE32F-NEXT: # %bb.21: # %cond.store27 ; RV64ZVE32F-NEXT: vmv.x.s a2, v9 @@ -12443,11 +12443,11 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64V-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64V-NEXT: vslidedown.vi v8, v8, 16 ; RV64V-NEXT: vslidedown.vi v10, v10, 16 -; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV64V-NEXT: vslidedown.vi v0, v0, 2 ; RV64V-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64V-NEXT: vsext.vf8 v16, v10 -; RV64V-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; RV64V-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64V-NEXT: vslidedown.vi v0, v0, 2 +; RV64V-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64V-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64V-NEXT: ret ; @@ -12476,9 +12476,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB102_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v10, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: bnez a2, .LBB102_49 ; RV64ZVE32F-NEXT: # %bb.5: # %else4 ; RV64ZVE32F-NEXT: andi a2, a1, 8 @@ -12493,9 +12493,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB102_8: # %else8 -; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 8 +; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_10 ; RV64ZVE32F-NEXT: # %bb.9: # %cond.store9 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -12506,9 +12506,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v14, v8, 5 ; RV64ZVE32F-NEXT: vse8.v v14, (a2) ; RV64ZVE32F-NEXT: .LBB102_10: # %else10 -; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v13, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 64 ; RV64ZVE32F-NEXT: bnez a2, .LBB102_51 ; RV64ZVE32F-NEXT: # %bb.11: # %else12 ; RV64ZVE32F-NEXT: andi a2, a1, 128 @@ -12530,9 +12530,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB102_15: # %else18 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v13, v12, 4 -; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v12, 2 +; RV64ZVE32F-NEXT: andi a2, a1, 1024 ; RV64ZVE32F-NEXT: beqz a2, .LBB102_17 ; RV64ZVE32F-NEXT: # %bb.16: # %cond.store19 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 @@ -12552,9 +12552,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 11 ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB102_19: # %else22 -; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 16 +; RV64ZVE32F-NEXT: slli a2, a1, 51 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_21 ; RV64ZVE32F-NEXT: # %bb.20: # %cond.store23 ; RV64ZVE32F-NEXT: vmv.x.s a2, v13 @@ -12574,9 +12574,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vslidedown.vi v11, v8, 13 ; RV64ZVE32F-NEXT: vse8.v v11, (a2) ; RV64ZVE32F-NEXT: .LBB102_23: # %else26 -; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v13, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 49 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_54 ; RV64ZVE32F-NEXT: # %bb.24: # %else28 ; RV64ZVE32F-NEXT: slli a2, a1, 48 @@ -12599,9 +12599,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB102_28: # %else34 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: slli a2, a1, 45 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v10, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 45 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_57 ; RV64ZVE32F-NEXT: # %bb.29: # %else36 ; RV64ZVE32F-NEXT: slli a2, a1, 44 @@ -12617,9 +12617,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB102_32: # %else40 -; RV64ZVE32F-NEXT: slli a2, a1, 42 ; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 8 +; RV64ZVE32F-NEXT: slli a2, a1, 42 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_34 ; RV64ZVE32F-NEXT: # %bb.33: # %cond.store41 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -12631,9 +12631,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB102_34: # %else42 -; RV64ZVE32F-NEXT: slli a2, a1, 41 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v11, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 41 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_59 ; RV64ZVE32F-NEXT: # %bb.35: # %else44 ; RV64ZVE32F-NEXT: slli a2, a1, 40 @@ -12656,9 +12656,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: .LBB102_39: # %else50 ; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v11, v10, 4 -; RV64ZVE32F-NEXT: slli a2, a1, 37 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v10, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 37 ; RV64ZVE32F-NEXT: bltz a2, .LBB102_62 ; RV64ZVE32F-NEXT: # %bb.40: # %else52 ; RV64ZVE32F-NEXT: slli a2, a1, 36 @@ -12679,9 +12679,9 @@ define void @mscatter_baseidx_v32i8(<32 x i8> %val, ptr %base, <32 x i8> %idxs, ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vse8.v v12, (a2) ; RV64ZVE32F-NEXT: .LBB102_44: # %else58 -; RV64ZVE32F-NEXT: slli a2, a1, 33 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v10, v11, 2 +; RV64ZVE32F-NEXT: slli a2, a1, 33 ; RV64ZVE32F-NEXT: bgez a2, .LBB102_46 ; RV64ZVE32F-NEXT: # %bb.45: # %cond.store59 ; RV64ZVE32F-NEXT: vmv.x.s a2, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll index ed6ec4d5659b1..6421d7c8022f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-fp.ll @@ -242,9 +242,9 @@ define void @masked_store_v32f64(<32 x double> %val, ptr %a, <32 x i1> %mask) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -278,12 +278,12 @@ define void @masked_store_v64f32(<64 x float> %val, ptr %a, <64 x i1> %mask) { ; CHECK-LABEL: masked_store_v64f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v64f32.p0(<64 x float> %val, ptr %a, i32 8, <64 x i1> %mask) @@ -294,12 +294,12 @@ define void @masked_store_v128bf16(<128 x bfloat> %val, ptr %a, <128 x i1> %mask ; CHECK-LABEL: masked_store_v128bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v128bf16.p0(<128 x bfloat> %val, ptr %a, i32 8, <128 x i1> %mask) @@ -310,12 +310,12 @@ define void @masked_store_v128f16(<128 x half> %val, ptr %a, <128 x i1> %mask) { ; CHECK-LABEL: masked_store_v128f16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v128f16.p0(<128 x half> %val, ptr %a, i32 8, <128 x i1> %mask) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll index c3b10db115bae..7a9fc0ecd8bb0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-store-int.ll @@ -240,9 +240,9 @@ define void @masked_store_v32i64(<32 x i64> %val, ptr %a, <32 x i1> %mask) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0), v0.t +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -276,12 +276,12 @@ define void @masked_store_v64i32(<64 x i32> %val, ptr %a, <64 x i1> %mask) { ; CHECK-LABEL: masked_store_v64i32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 4 +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vse32.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v64i32.p0(<64 x i32> %val, ptr %a, i32 8, <64 x i1> %mask) @@ -303,12 +303,12 @@ define void @masked_store_v128i16(<128 x i16> %val, ptr %a, <128 x i1> %mask) { ; CHECK-LABEL: masked_store_v128i16: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 64 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 8 ; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v8, (a0), v0.t ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 8 +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; CHECK-NEXT: vse16.v v16, (a0), v0.t ; CHECK-NEXT: ret call void @llvm.masked.store.v128i16.p0(<128 x i16> %val, ptr %a, i32 8, <128 x i1> %mask) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll index fe65da0d330f1..68e218fcad062 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-nearbyint-vp.ll @@ -9,19 +9,19 @@ declare <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_nearbyint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v2f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> %va, <2 x i1> %m, i32 %evl) ret <2 x half> %v @@ -30,17 +30,17 @@ define <2 x half> @vp_nearbyint_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext % define <2 x half> @vp_nearbyint_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v2f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x half> @llvm.vp.nearbyint.v2f16(<2 x half> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x half> %v @@ -51,19 +51,19 @@ declare <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_nearbyint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI2_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> %va, <4 x i1> %m, i32 %evl) ret <4 x half> %v @@ -72,17 +72,17 @@ define <4 x half> @vp_nearbyint_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext % define <4 x half> @vp_nearbyint_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI3_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x half> @llvm.vp.nearbyint.v4f16(<4 x half> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x half> %v @@ -93,19 +93,19 @@ declare <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_nearbyint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f16: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI4_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> %va, <8 x i1> %m, i32 %evl) ret <8 x half> %v @@ -114,17 +114,17 @@ define <8 x half> @vp_nearbyint_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % define <8 x half> @vp_nearbyint_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI5_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x half> @llvm.vp.nearbyint.v8f16(<8 x half> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x half> %v @@ -137,19 +137,19 @@ define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI6_0) ; CHECK-NEXT: flh fa5, %lo(.LCPI6_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> %m, i32 %evl) ret <16 x half> %v @@ -158,17 +158,17 @@ define <16 x half> @vp_nearbyint_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe define <16 x half> @vp_nearbyint_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f16_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI7_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI7_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x half> @llvm.vp.nearbyint.v16f16(<16 x half> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x half> %v @@ -183,15 +183,15 @@ define <2 x float> @vp_nearbyint_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> %va, <2 x i1> %m, i32 %evl) ret <2 x float> %v @@ -204,13 +204,13 @@ define <2 x float> @vp_nearbyint_v2f32_unmasked(<2 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x float> @llvm.vp.nearbyint.v2f32(<2 x float> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x float> %v @@ -225,15 +225,15 @@ define <4 x float> @vp_nearbyint_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> %va, <4 x i1> %m, i32 %evl) ret <4 x float> %v @@ -246,13 +246,13 @@ define <4 x float> @vp_nearbyint_v4f32_unmasked(<4 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x float> @llvm.vp.nearbyint.v4f32(<4 x float> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x float> %v @@ -268,16 +268,16 @@ define <8 x float> @vp_nearbyint_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> %m, i32 %evl) ret <8 x float> %v @@ -290,13 +290,13 @@ define <8 x float> @vp_nearbyint_v8f32_unmasked(<8 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x float> @llvm.vp.nearbyint.v8f32(<8 x float> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x float> %v @@ -312,16 +312,16 @@ define <16 x float> @vp_nearbyint_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> %m, i32 %evl) ret <16 x float> %v @@ -334,13 +334,13 @@ define <16 x float> @vp_nearbyint_v16f32_unmasked(<16 x float> %va, i32 zeroext ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x float> @llvm.vp.nearbyint.v16f32(<16 x float> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x float> %v @@ -351,19 +351,19 @@ declare <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_nearbyint_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> %m, i32 %evl) ret <2 x double> %v @@ -372,17 +372,17 @@ define <2 x double> @vp_nearbyint_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroe define <2 x double> @vp_nearbyint_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <2 x double> @llvm.vp.nearbyint.v2f64(<2 x double> %va, <2 x i1> splat (i1 true), i32 %evl) ret <2 x double> %v @@ -395,19 +395,19 @@ define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> %m, i32 %evl) ret <4 x double> %v @@ -416,17 +416,17 @@ define <4 x double> @vp_nearbyint_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe define <4 x double> @vp_nearbyint_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <4 x double> @llvm.vp.nearbyint.v4f64(<4 x double> %va, <4 x i1> splat (i1 true), i32 %evl) ret <4 x double> %v @@ -439,19 +439,19 @@ define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> %m, i32 %evl) ret <8 x double> %v @@ -460,17 +460,17 @@ define <8 x double> @vp_nearbyint_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe define <8 x double> @vp_nearbyint_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI21_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <8 x double> @llvm.vp.nearbyint.v8f64(<8 x double> %va, <8 x i1> splat (i1 true), i32 %evl) ret <8 x double> %v @@ -483,19 +483,19 @@ define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 z ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> %m, i32 %evl) ret <15 x double> %v @@ -504,17 +504,17 @@ define <15 x double> @vp_nearbyint_v15f64(<15 x double> %va, <15 x i1> %m, i32 z define <15 x double> @vp_nearbyint_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v15f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI23_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <15 x double> @llvm.vp.nearbyint.v15f64(<15 x double> %va, <15 x i1> splat (i1 true), i32 %evl) ret <15 x double> %v @@ -527,19 +527,19 @@ define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 z ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> %m, i32 %evl) ret <16 x double> %v @@ -548,17 +548,17 @@ define <16 x double> @vp_nearbyint_v16f64(<16 x double> %va, <16 x i1> %m, i32 z define <16 x double> @vp_nearbyint_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_v16f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI25_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call <16 x double> @llvm.vp.nearbyint.v16f64(<16 x double> %va, <16 x i1> splat (i1 true), i32 %evl) ret <16 x double> %v @@ -582,8 +582,8 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -598,9 +598,9 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t -; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -610,24 +610,24 @@ define <32 x double> @vp_nearbyint_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: frflags a1 ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb @@ -674,9 +674,9 @@ define <32 x double> @vp_nearbyint_v32f64_unmasked(<32 x double> %va, i32 zeroex ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t -; CHECK-NEXT: fsflags a1 ; CHECK-NEXT: ret %v = call <32 x double> @llvm.vp.nearbyint.v32f64(<32 x double> %va, <32 x i1> splat (i1 true), i32 %evl) ret <32 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll index bf8baafc4a25d..ff6984eb82df1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-formation.ll @@ -122,9 +122,9 @@ define i32 @reduce_sum_16xi32_prefix3(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix3: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredsum.vs v8, v8, v9 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vredsum.vs v8, v9, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -140,9 +140,9 @@ define i32 @reduce_sum_16xi32_prefix4(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix4: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vredsum.vs v8, v8, v9 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vredsum.vs v8, v9, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -160,9 +160,9 @@ define i32 @reduce_sum_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix5: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vredsum.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -182,9 +182,9 @@ define i32 @reduce_sum_16xi32_prefix6(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix6: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 6, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vredsum.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -206,9 +206,9 @@ define i32 @reduce_sum_16xi32_prefix7(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix7: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 7, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vredsum.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -232,9 +232,9 @@ define i32 @reduce_sum_16xi32_prefix8(ptr %p) { ; CHECK-LABEL: reduce_sum_16xi32_prefix8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredsum.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vredsum.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -492,9 +492,9 @@ define i32 @reduce_xor_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_xor_16xi32_prefix5: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredxor.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vredxor.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -568,9 +568,9 @@ define i32 @reduce_or_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_or_16xi32_prefix5: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredor.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vredor.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -609,11 +609,11 @@ define i32 @reduce_smax_16xi32_prefix2(ptr %p) { define i32 @reduce_smax_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_smax_16xi32_prefix5: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vredmax.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vredmax.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -687,9 +687,9 @@ define i32 @reduce_umax_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_umax_16xi32_prefix5: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v10, zero -; CHECK-NEXT: vredmaxu.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vredmaxu.vs v8, v10, v8 ; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -734,11 +734,11 @@ define i32 @reduce_umin_16xi32_prefix5(ptr %p) { ; ; RV64-LABEL: reduce_umin_16xi32_prefix5: ; RV64: # %bb.0: +; RV64-NEXT: li a1, -1 ; RV64-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: li a0, -1 -; RV64-NEXT: vmv.s.x v10, a0 -; RV64-NEXT: vredminu.vs v8, v8, v10 +; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: vle32.v v10, (a0) +; RV64-NEXT: vredminu.vs v8, v10, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret %v = load <16 x i32>, ptr %p, align 256 @@ -758,9 +758,9 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) { ; CHECK-LABEL: reduce_fadd_16xf32_prefix2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vmv.s.x v9, zero -; CHECK-NEXT: vfredusum.vs v8, v8, v9 +; CHECK-NEXT: vmv.s.x v8, zero +; CHECK-NEXT: vle32.v v9, (a0) +; CHECK-NEXT: vfredusum.vs v8, v9, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <16 x float>, ptr %p, align 256 @@ -773,11 +773,11 @@ define float @reduce_fadd_16xf32_prefix2(ptr %p) { define float @reduce_fadd_16xi32_prefix5(ptr %p) { ; CHECK-LABEL: reduce_fadd_16xi32_prefix5: ; CHECK: # %bb.0: +; CHECK-NEXT: lui a1, 524288 ; CHECK-NEXT: vsetivli zero, 5, e32, m2, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: lui a0, 524288 -; CHECK-NEXT: vmv.s.x v10, a0 -; CHECK-NEXT: vfredusum.vs v8, v8, v10 +; CHECK-NEXT: vmv.s.x v8, a1 +; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vfredusum.vs v8, v10, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <16 x float>, ptr %p, align 256 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll index 6684e6d223eac..c2cac3eeb7a46 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp-vp.ll @@ -121,9 +121,9 @@ declare float @llvm.vp.reduce.fadd.v64f32(float, <64 x float>, <64 x i1>, i32) define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB8_2 ; CHECK-NEXT: # %bb.1: @@ -149,9 +149,9 @@ define float @vpreduce_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 define float @vpreduce_ord_fadd_v64f32(float %s, <64 x float> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_ord_fadd_v64f32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB9_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll index 2b279389253b0..23197ede1da49 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-fp.ll @@ -253,11 +253,11 @@ define half @vreduce_ord_fadd_v128f16(ptr %x, half %s) { ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: li a2, 64 ; CHECK-NEXT: vsetvli zero, a2, e16, m8, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v16, (a1) +; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vfredosum.vs v8, v8, v24 -; CHECK-NEXT: vfredosum.vs v8, v16, v8 +; CHECK-NEXT: vfredosum.vs v16, v16, v24 +; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <128 x half>, ptr %x @@ -744,11 +744,11 @@ define float @vreduce_ord_fadd_v64f32(ptr %x, float %s) { ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: li a2, 32 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a0) -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vfredosum.vs v8, v8, v24 -; CHECK-NEXT: vfredosum.vs v8, v16, v8 +; CHECK-NEXT: vfredosum.vs v16, v16, v24 +; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <64 x float>, ptr %x @@ -1135,11 +1135,11 @@ define double @vreduce_ord_fadd_v32f64(ptr %x, double %s) { ; CHECK: # %bb.0: ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vle64.v v8, (a1) +; CHECK-NEXT: vle64.v v16, (a0) ; CHECK-NEXT: vfmv.s.f v24, fa0 -; CHECK-NEXT: vfredosum.vs v8, v8, v24 -; CHECK-NEXT: vfredosum.vs v8, v16, v8 +; CHECK-NEXT: vfredosum.vs v16, v16, v24 +; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret %v = load <32 x double>, ptr %x @@ -1344,17 +1344,17 @@ define float @vreduce_fmin_v128f32(ptr %x) { ; CHECK-LABEL: vreduce_fmin_v128f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: addi a2, a0, 256 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a2) -; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vle32.v v0, (a1) -; CHECK-NEXT: vfmin.vv v8, v24, v8 -; CHECK-NEXT: vfmin.vv v16, v16, v0 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v0, (a0) +; CHECK-NEXT: vfmin.vv v24, v0, v24 ; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1591,17 +1591,17 @@ define float @vreduce_fmax_v128f32(ptr %x) { ; CHECK-LABEL: vreduce_fmax_v128f32: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: addi a2, a0, 256 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a2) -; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vle32.v v0, (a1) -; CHECK-NEXT: vfmax.vv v8, v24, v8 -; CHECK-NEXT: vfmax.vv v16, v16, v0 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v0, (a0) +; CHECK-NEXT: vfmax.vv v24, v0, v24 ; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -1997,59 +1997,56 @@ define float @vreduce_fminimum_v128f32(ptr %x) { ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: addi a2, a0, 384 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a2) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v16, (a2) +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle32.v v24, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vfmin.vv v16, v8, v16 -; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vmfeq.vv v7, v16, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v24, v24, v8 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfmin.vv v8, v16, v8 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB121_2 @@ -2077,17 +2074,17 @@ define float @vreduce_fminimum_v128f32_nonans(ptr %x) { ; CHECK-LABEL: vreduce_fminimum_v128f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: addi a2, a0, 256 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a2) -; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vle32.v v0, (a1) -; CHECK-NEXT: vfmin.vv v8, v24, v8 -; CHECK-NEXT: vfmin.vv v16, v16, v0 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v0, (a0) +; CHECK-NEXT: vfmin.vv v24, v0, v24 ; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -2245,42 +2242,25 @@ declare double @llvm.vector.reduce.fminimum.v32f64(<32 x double>) define double @vreduce_fminimum_v32f64(ptr %x) { ; CHECK-LABEL: vreduce_fminimum_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB131_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI131_0) ; CHECK-NEXT: fld fa0, %lo(.LCPI131_0)(a0) -; CHECK-NEXT: j .LBB131_3 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB131_2: ; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: .LBB131_3: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = load <32 x double>, ptr %x %red = call double @llvm.vector.reduce.fminimum.v32f64(<32 x double> %v) @@ -2314,59 +2294,56 @@ define double @vreduce_fminimum_v64f64(ptr %x) { ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v16, v8 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v16, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmfeq.vv v7, v8, v8 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vfmin.vv v16, v8, v16 -; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmin.vv v24, v24, v8 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfmin.vv v8, v16, v8 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB133_2 @@ -2395,15 +2372,15 @@ define double @vreduce_fminimum_v64f64_nonans(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: vle64.v v0, (a1) -; CHECK-NEXT: vfmin.vv v16, v24, v16 -; CHECK-NEXT: vfmin.vv v8, v8, v0 +; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v0, (a0) +; CHECK-NEXT: vfmin.vv v24, v0, v24 ; CHECK-NEXT: vfmin.vv v8, v8, v16 +; CHECK-NEXT: vfmin.vv v8, v8, v24 ; CHECK-NEXT: vfredmin.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -2711,59 +2688,56 @@ define float @vreduce_fmaximum_v128f32(ptr %x) { ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: addi a2, a0, 384 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma -; CHECK-NEXT: vle32.v v8, (a2) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: vle32.v v16, (a2) +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle32.v v24, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v0 ; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmfeq.vv v0, v8, v8 ; CHECK-NEXT: vmfeq.vv v7, v24, v24 ; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vfmax.vv v16, v8, v16 -; CHECK-NEXT: vmfeq.vv v0, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 +; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vmfeq.vv v7, v16, v16 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v24, v24, v8 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfmax.vv v8, v16, v8 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB149_2 @@ -2791,17 +2765,17 @@ define float @vreduce_fmaximum_v128f32_nonans(ptr %x) { ; CHECK-LABEL: vreduce_fmaximum_v128f32_nonans: ; CHECK: # %bb.0: ; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a2, a0, 384 +; CHECK-NEXT: addi a2, a0, 256 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a2) -; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: vle32.v v16, (a0) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vle32.v v0, (a1) -; CHECK-NEXT: vfmax.vv v8, v24, v8 -; CHECK-NEXT: vfmax.vv v16, v16, v0 +; CHECK-NEXT: vle32.v v24, (a1) +; CHECK-NEXT: vle32.v v0, (a0) +; CHECK-NEXT: vfmax.vv v24, v0, v24 ; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret @@ -2959,42 +2933,25 @@ declare double @llvm.vector.reduce.fmaximum.v32f64(<32 x double>) define double @vreduce_fmaximum_v32f64(ptr %x) { ; CHECK-LABEL: vreduce_fmaximum_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB159_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: lui a0, %hi(.LCPI159_0) ; CHECK-NEXT: fld fa0, %lo(.LCPI159_0)(a0) -; CHECK-NEXT: j .LBB159_3 +; CHECK-NEXT: ret ; CHECK-NEXT: .LBB159_2: ; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 -; CHECK-NEXT: .LBB159_3: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = load <32 x double>, ptr %x %red = call double @llvm.vector.reduce.fmaximum.v32f64(<32 x double> %v) @@ -3028,59 +2985,56 @@ define double @vreduce_fmaximum_v64f64(ptr %x) { ; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: addi a1, a0, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 384 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vle64.v v24, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmfeq.vv v7, v16, v16 -; CHECK-NEXT: vmerge.vvm v24, v8, v16, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v16, v16, v8, v0 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v16, v8 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v16, v16, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vmfeq.vv v0, v24, v24 +; CHECK-NEXT: vmfeq.vv v7, v8, v8 +; CHECK-NEXT: vmerge.vvm v16, v24, v8, v0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: vfmax.vv v16, v8, v16 -; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmfeq.vv v7, v16, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vfmax.vv v24, v24, v8 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmerge.vvm v8, v24, v16, v0 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 +; CHECK-NEXT: vfmax.vv v8, v16, v8 ; CHECK-NEXT: vmfne.vv v16, v8, v8 ; CHECK-NEXT: vcpop.m a0, v16 ; CHECK-NEXT: beqz a0, .LBB161_2 @@ -3109,15 +3063,15 @@ define double @vreduce_fmaximum_v64f64_nonans(ptr %x) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a0, 256 +; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: addi a1, a0, 384 ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: vle64.v v0, (a1) -; CHECK-NEXT: vfmax.vv v16, v24, v16 -; CHECK-NEXT: vfmax.vv v8, v8, v0 +; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: vle64.v v0, (a0) +; CHECK-NEXT: vfmax.vv v24, v0, v24 ; CHECK-NEXT: vfmax.vv v8, v8, v16 +; CHECK-NEXT: vfmax.vv v8, v8, v24 ; CHECK-NEXT: vfredmax.vs v8, v8, v8 ; CHECK-NEXT: vfmv.f.s fa0, v8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll index f920e39e7d295..8f61f314cf71b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int-vp.ll @@ -801,9 +801,9 @@ declare i32 @llvm.vp.reduce.xor.v64i32(i32, <64 x i32>, <64 x i1>, i32) define signext i32 @vpreduce_xor_v64i32(i32 signext %s, <64 x i32> %v, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vpreduce_xor_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB49_2 ; CHECK-NEXT: # %bb.1: @@ -1575,10 +1575,10 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3 ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vid.v v10 ; RV32-NEXT: vmsltu.vx v9, v10, a1 -; RV32-NEXT: vmand.mm v0, v9, v0 ; RV32-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.i v9, 1 -; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 +; RV32-NEXT: vmv.v.i v10, 1 +; RV32-NEXT: vmand.mm v0, v9, v0 +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-NEXT: vslidedown.vi v9, v8, 4 ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: vslidedown.vi v9, v8, 2 @@ -1606,10 +1606,10 @@ define signext i8 @vpreduce_mul_v8i8(i8 signext %s, <8 x i8> %v, <8 x i1> %m, i3 ; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vid.v v10 ; RV64-NEXT: vmsltu.vx v9, v10, a1 -; RV64-NEXT: vmand.mm v0, v9, v0 ; RV64-NEXT: vsetvli zero, zero, e8, mf2, ta, ma -; RV64-NEXT: vmv.v.i v9, 1 -; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 +; RV64-NEXT: vmv.v.i v10, 1 +; RV64-NEXT: vmand.mm v0, v9, v0 +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: vslidedown.vi v9, v8, 4 ; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vslidedown.vi v9, v8, 2 @@ -1643,10 +1643,10 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV32-NEXT: vid.v v12 ; RV32-NEXT: vmsltu.vx v9, v12, a1 -; RV32-NEXT: vmand.mm v0, v9, v0 ; RV32-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV32-NEXT: vmv.v.i v9, 1 -; RV32-NEXT: vmerge.vvm v8, v9, v8, v0 +; RV32-NEXT: vmv.v.i v10, 1 +; RV32-NEXT: vmand.mm v0, v9, v0 +; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-NEXT: vslidedown.vi v9, v8, 8 ; RV32-NEXT: vmul.vv v8, v8, v9 ; RV32-NEXT: vslidedown.vi v9, v8, 4 @@ -1676,10 +1676,10 @@ define signext i8 @vpreduce_mul_v16i8(i8 signext %s, <16 x i8> %v, <16 x i1> %m, ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vid.v v12 ; RV64-NEXT: vmsltu.vx v9, v12, a1 -; RV64-NEXT: vmand.mm v0, v9, v0 ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, ma -; RV64-NEXT: vmv.v.i v9, 1 -; RV64-NEXT: vmerge.vvm v8, v9, v8, v0 +; RV64-NEXT: vmv.v.i v10, 1 +; RV64-NEXT: vmand.mm v0, v9, v0 +; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: vslidedown.vi v9, v8, 8 ; RV64-NEXT: vmul.vv v8, v8, v9 ; RV64-NEXT: vslidedown.vi v9, v8, 4 @@ -1716,10 +1716,10 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV32-NEXT: vid.v v16 ; RV32-NEXT: vmsltu.vx v10, v16, a1 -; RV32-NEXT: vmand.mm v0, v10, v0 ; RV32-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; RV32-NEXT: vmv.v.i v10, 1 -; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 +; RV32-NEXT: vmv.v.i v12, 1 +; RV32-NEXT: vmand.mm v0, v10, v0 +; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV32-NEXT: vslidedown.vi v10, v8, 16 ; RV32-NEXT: vmul.vv v8, v8, v10 ; RV32-NEXT: vslidedown.vi v10, v8, 8 @@ -1752,10 +1752,10 @@ define signext i8 @vpreduce_mul_v32i8(i8 signext %s, <32 x i8> %v, <32 x i1> %m, ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; RV64-NEXT: vid.v v16 ; RV64-NEXT: vmsltu.vx v10, v16, a1 -; RV64-NEXT: vmand.mm v0, v10, v0 ; RV64-NEXT: vsetvli zero, zero, e8, m2, ta, ma -; RV64-NEXT: vmv.v.i v10, 1 -; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 +; RV64-NEXT: vmv.v.i v12, 1 +; RV64-NEXT: vmand.mm v0, v10, v0 +; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 ; RV64-NEXT: vslidedown.vi v10, v8, 16 ; RV64-NEXT: vmul.vv v8, v8, v10 ; RV64-NEXT: vslidedown.vi v10, v8, 8 @@ -1794,18 +1794,19 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV32-NEXT: lui a3, %hi(.LCPI72_0) ; RV32-NEXT: addi a3, a3, %lo(.LCPI72_0) ; RV32-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV32-NEXT: vle8.v v12, (a3) ; RV32-NEXT: vid.v v16 -; RV32-NEXT: vmsltu.vx v14, v16, a1 -; RV32-NEXT: li a3, 64 -; RV32-NEXT: vsext.vf4 v16, v12 ; RV32-NEXT: vmsltu.vx v12, v16, a1 +; RV32-NEXT: vle8.v v14, (a3) +; RV32-NEXT: li a3, 64 +; RV32-NEXT: vsext.vf4 v16, v14 +; RV32-NEXT: vmsltu.vx v13, v16, a1 +; RV32-NEXT: vsetvli zero, a3, e8, m4, ta, ma +; RV32-NEXT: vmv.v.i v16, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vslideup.vi v14, v12, 4 +; RV32-NEXT: vslideup.vi v12, v13, 4 ; RV32-NEXT: vsetvli zero, a3, e8, m4, ta, ma -; RV32-NEXT: vmand.mm v0, v14, v0 -; RV32-NEXT: vmv.v.i v12, 1 -; RV32-NEXT: vmerge.vvm v8, v12, v8, v0 +; RV32-NEXT: vmand.mm v0, v12, v0 +; RV32-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV32-NEXT: vslidedown.vx v12, v8, a0 ; RV32-NEXT: vmul.vv v8, v8, v12 ; RV32-NEXT: vslidedown.vi v12, v8, 16 @@ -1840,18 +1841,19 @@ define signext i8 @vpreduce_mul_v64i8(i8 signext %s, <64 x i8> %v, <64 x i1> %m, ; RV64-NEXT: lui a3, %hi(.LCPI72_0) ; RV64-NEXT: addi a3, a3, %lo(.LCPI72_0) ; RV64-NEXT: vsetvli zero, a0, e32, m8, ta, ma -; RV64-NEXT: vle8.v v12, (a3) ; RV64-NEXT: vid.v v16 -; RV64-NEXT: vmsltu.vx v14, v16, a1 -; RV64-NEXT: li a3, 64 -; RV64-NEXT: vsext.vf4 v16, v12 ; RV64-NEXT: vmsltu.vx v12, v16, a1 +; RV64-NEXT: vle8.v v14, (a3) +; RV64-NEXT: li a3, 64 +; RV64-NEXT: vsext.vf4 v16, v14 +; RV64-NEXT: vmsltu.vx v13, v16, a1 +; RV64-NEXT: vsetvli zero, a3, e8, m4, ta, ma +; RV64-NEXT: vmv.v.i v16, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vslideup.vi v14, v12, 4 +; RV64-NEXT: vslideup.vi v12, v13, 4 ; RV64-NEXT: vsetvli zero, a3, e8, m4, ta, ma -; RV64-NEXT: vmand.mm v0, v14, v0 -; RV64-NEXT: vmv.v.i v12, 1 -; RV64-NEXT: vmerge.vvm v8, v12, v8, v0 +; RV64-NEXT: vmand.mm v0, v12, v0 +; RV64-NEXT: vmerge.vvm v8, v16, v8, v0 ; RV64-NEXT: vslidedown.vx v12, v8, a0 ; RV64-NEXT: vmul.vv v8, v8, v12 ; RV64-NEXT: vslidedown.vi v12, v8, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll index 707d1202aca0f..c3c657c96c92a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-reduction-int.ll @@ -1471,14 +1471,14 @@ declare i64 @llvm.vector.reduce.add.v64i64(<64 x i64>) define i64 @vreduce_add_v64i64(ptr %x) nounwind { ; RV32-LABEL: vreduce_add_v64i64: ; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 384 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v24, (a1) -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: vle64.v v0, (a1) ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a0, a0, 256 -; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: vadd.vv v24, v0, v24 ; RV32-NEXT: vmv.s.x v7, zero ; RV32-NEXT: li a1, 32 @@ -1495,15 +1495,15 @@ define i64 @vreduce_add_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 384 ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vle64.v v0, (a1) -; RV64-NEXT: vadd.vv v16, v24, v16 -; RV64-NEXT: vadd.vv v8, v8, v0 +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v0, (a0) +; RV64-NEXT: vadd.vv v24, v0, v24 ; RV64-NEXT: vadd.vv v8, v8, v16 +; RV64-NEXT: vadd.vv v8, v8, v24 ; RV64-NEXT: vmv.s.x v16, zero ; RV64-NEXT: vredsum.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 @@ -1519,18 +1519,18 @@ define i64 @vwreduce_add_v64i64(ptr %x) { ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vle32.v v16, (a0) -; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vle32.v v24, (a0) ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v0, v16, 16 +; RV32-NEXT: vslidedown.vi v0, v24, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vwadd.vv v24, v16, v8 +; RV32-NEXT: vwadd.vv v8, v24, v16 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 16 +; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vwadd.vv v16, v0, v8 +; RV32-NEXT: vwadd.vv v24, v0, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v24, v16 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vmv.s.x v16, zero ; RV32-NEXT: vredsum.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 @@ -1550,15 +1550,15 @@ define i64 @vwreduce_add_v64i64(ptr %x) { ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: li a2, 32 ; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vle32.v v16, (a1) +; RV64-NEXT: vle32.v v8, (a1) +; RV64-NEXT: vle32.v v16, (a0) ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v8, 16 +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vslidedown.vi v0, v16, 16 +; RV64-NEXT: vslidedown.vi v0, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vwadd.vv v24, v8, v16 +; RV64-NEXT: vwadd.vv v24, v16, v8 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vwadd.vv v8, v16, v0 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma @@ -1585,18 +1585,18 @@ define i64 @vwreduce_uadd_v64i64(ptr %x) { ; RV32-NEXT: addi a1, a0, 128 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV32-NEXT: vle32.v v16, (a0) -; RV32-NEXT: vle32.v v8, (a1) +; RV32-NEXT: vle32.v v16, (a1) +; RV32-NEXT: vle32.v v24, (a0) ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v0, v16, 16 +; RV32-NEXT: vslidedown.vi v0, v24, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vwaddu.vv v24, v16, v8 +; RV32-NEXT: vwaddu.vv v8, v24, v16 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 16 +; RV32-NEXT: vslidedown.vi v16, v16, 16 ; RV32-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV32-NEXT: vwaddu.vv v16, v0, v8 +; RV32-NEXT: vwaddu.vv v24, v0, v16 ; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; RV32-NEXT: vadd.vv v8, v24, v16 +; RV32-NEXT: vadd.vv v8, v8, v24 ; RV32-NEXT: vmv.s.x v16, zero ; RV32-NEXT: vredsum.vs v8, v8, v16 ; RV32-NEXT: vmv.x.s a0, v8 @@ -1616,15 +1616,15 @@ define i64 @vwreduce_uadd_v64i64(ptr %x) { ; RV64-NEXT: addi a1, a0, 128 ; RV64-NEXT: li a2, 32 ; RV64-NEXT: vsetvli zero, a2, e32, m8, ta, ma -; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: vle32.v v16, (a1) +; RV64-NEXT: vle32.v v8, (a1) +; RV64-NEXT: vle32.v v16, (a0) ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v24, v8, 16 +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: addi a0, sp, 16 ; RV64-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; RV64-NEXT: vslidedown.vi v0, v16, 16 +; RV64-NEXT: vslidedown.vi v0, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; RV64-NEXT: vwaddu.vv v24, v8, v16 +; RV64-NEXT: vwaddu.vv v24, v16, v8 ; RV64-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; RV64-NEXT: vwaddu.vv v8, v16, v0 ; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma @@ -2201,16 +2201,16 @@ define i64 @vreduce_and_v64i64(ptr %x) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vand.vv v16, v0, v16 -; RV32-NEXT: vand.vv v8, v8, v24 +; RV32-NEXT: vand.vv v24, v0, v24 ; RV32-NEXT: vand.vv v8, v8, v16 +; RV32-NEXT: vand.vv v8, v8, v24 ; RV32-NEXT: vredand.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2222,15 +2222,15 @@ define i64 @vreduce_and_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 384 ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vle64.v v0, (a1) -; RV64-NEXT: vand.vv v16, v24, v16 -; RV64-NEXT: vand.vv v8, v8, v0 +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v0, (a0) +; RV64-NEXT: vand.vv v24, v0, v24 ; RV64-NEXT: vand.vv v8, v8, v16 +; RV64-NEXT: vand.vv v8, v8, v24 ; RV64-NEXT: vredand.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -2793,16 +2793,16 @@ define i64 @vreduce_or_v64i64(ptr %x) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vor.vv v16, v0, v16 -; RV32-NEXT: vor.vv v8, v8, v24 +; RV32-NEXT: vor.vv v24, v0, v24 ; RV32-NEXT: vor.vv v8, v8, v16 +; RV32-NEXT: vor.vv v8, v8, v24 ; RV32-NEXT: vredor.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -2814,15 +2814,15 @@ define i64 @vreduce_or_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 384 ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vle64.v v0, (a1) -; RV64-NEXT: vor.vv v16, v24, v16 -; RV64-NEXT: vor.vv v8, v8, v0 +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v0, (a0) +; RV64-NEXT: vor.vv v24, v0, v24 ; RV64-NEXT: vor.vv v8, v8, v16 +; RV64-NEXT: vor.vv v8, v8, v24 ; RV64-NEXT: vredor.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -3414,14 +3414,14 @@ declare i64 @llvm.vector.reduce.xor.v64i64(<64 x i64>) define i64 @vreduce_xor_v64i64(ptr %x) nounwind { ; RV32-LABEL: vreduce_xor_v64i64: ; RV32: # %bb.0: -; RV32-NEXT: addi a1, a0, 384 ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV32-NEXT: vle64.v v24, (a1) -; RV32-NEXT: addi a1, a0, 128 -; RV32-NEXT: vle64.v v0, (a1) ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a0, a0, 256 -; RV32-NEXT: vle64.v v16, (a0) +; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 +; RV32-NEXT: addi a0, a0, 128 +; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: vxor.vv v24, v0, v24 ; RV32-NEXT: vmv.s.x v7, zero ; RV32-NEXT: li a1, 32 @@ -3438,15 +3438,15 @@ define i64 @vreduce_xor_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 384 ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vle64.v v0, (a1) -; RV64-NEXT: vxor.vv v16, v24, v16 -; RV64-NEXT: vxor.vv v8, v8, v0 +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v0, (a0) +; RV64-NEXT: vxor.vv v24, v0, v24 ; RV64-NEXT: vxor.vv v8, v8, v16 +; RV64-NEXT: vxor.vv v8, v8, v24 ; RV64-NEXT: vmv.s.x v16, zero ; RV64-NEXT: vredxor.vs v8, v8, v16 ; RV64-NEXT: vmv.x.s a0, v8 @@ -4011,16 +4011,16 @@ define i64 @vreduce_smin_v64i64(ptr %x) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vmin.vv v16, v0, v16 -; RV32-NEXT: vmin.vv v8, v8, v24 +; RV32-NEXT: vmin.vv v24, v0, v24 ; RV32-NEXT: vmin.vv v8, v8, v16 +; RV32-NEXT: vmin.vv v8, v8, v24 ; RV32-NEXT: vredmin.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -4032,15 +4032,15 @@ define i64 @vreduce_smin_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 384 ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vle64.v v0, (a1) -; RV64-NEXT: vmin.vv v16, v24, v16 -; RV64-NEXT: vmin.vv v8, v8, v0 +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v0, (a0) +; RV64-NEXT: vmin.vv v24, v0, v24 ; RV64-NEXT: vmin.vv v8, v8, v16 +; RV64-NEXT: vmin.vv v8, v8, v24 ; RV64-NEXT: vredmin.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -4604,16 +4604,16 @@ define i64 @vreduce_smax_v64i64(ptr %x) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vmax.vv v16, v0, v16 -; RV32-NEXT: vmax.vv v8, v8, v24 +; RV32-NEXT: vmax.vv v24, v0, v24 ; RV32-NEXT: vmax.vv v8, v8, v16 +; RV32-NEXT: vmax.vv v8, v8, v24 ; RV32-NEXT: vredmax.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -4625,15 +4625,15 @@ define i64 @vreduce_smax_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 384 ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vle64.v v0, (a1) -; RV64-NEXT: vmax.vv v16, v24, v16 -; RV64-NEXT: vmax.vv v8, v8, v0 +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v0, (a0) +; RV64-NEXT: vmax.vv v24, v0, v24 ; RV64-NEXT: vmax.vv v8, v8, v16 +; RV64-NEXT: vmax.vv v8, v8, v24 ; RV64-NEXT: vredmax.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5197,16 +5197,16 @@ define i64 @vreduce_umin_v64i64(ptr %x) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vminu.vv v16, v0, v16 -; RV32-NEXT: vminu.vv v8, v8, v24 +; RV32-NEXT: vminu.vv v24, v0, v24 ; RV32-NEXT: vminu.vv v8, v8, v16 +; RV32-NEXT: vminu.vv v8, v8, v24 ; RV32-NEXT: vredminu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5218,15 +5218,15 @@ define i64 @vreduce_umin_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 384 ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vle64.v v0, (a1) -; RV64-NEXT: vminu.vv v16, v24, v16 -; RV64-NEXT: vminu.vv v8, v8, v0 +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v0, (a0) +; RV64-NEXT: vminu.vv v24, v0, v24 ; RV64-NEXT: vminu.vv v8, v8, v16 +; RV64-NEXT: vminu.vv v8, v8, v24 ; RV64-NEXT: vredminu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -5789,16 +5789,16 @@ define i64 @vreduce_umax_v64i64(ptr %x) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v0, (a0) ; RV32-NEXT: li a1, 32 -; RV32-NEXT: vmaxu.vv v16, v0, v16 -; RV32-NEXT: vmaxu.vv v8, v8, v24 +; RV32-NEXT: vmaxu.vv v24, v0, v24 ; RV32-NEXT: vmaxu.vv v8, v8, v16 +; RV32-NEXT: vmaxu.vv v8, v8, v24 ; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma @@ -5810,15 +5810,15 @@ define i64 @vreduce_umax_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 384 ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vle64.v v0, (a1) -; RV64-NEXT: vmaxu.vv v16, v24, v16 -; RV64-NEXT: vmaxu.vv v8, v8, v0 +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v0, (a0) +; RV64-NEXT: vmaxu.vv v24, v0, v24 ; RV64-NEXT: vmaxu.vv v8, v8, v16 +; RV64-NEXT: vmaxu.vv v8, v8, v24 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a0, v8 ; RV64-NEXT: ret @@ -6585,15 +6585,15 @@ define i64 @vreduce_mul_v64i64(ptr %x) nounwind { ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-NEXT: vle64.v v8, (a0) -; RV32-NEXT: addi a1, a0, 384 -; RV32-NEXT: vle64.v v16, (a1) ; RV32-NEXT: addi a1, a0, 256 +; RV32-NEXT: vle64.v v16, (a1) +; RV32-NEXT: addi a1, a0, 384 ; RV32-NEXT: addi a0, a0, 128 -; RV32-NEXT: vle64.v v24, (a0) -; RV32-NEXT: vle64.v v0, (a1) -; RV32-NEXT: vmul.vv v16, v24, v16 -; RV32-NEXT: vmul.vv v8, v8, v0 +; RV32-NEXT: vle64.v v24, (a1) +; RV32-NEXT: vle64.v v0, (a0) +; RV32-NEXT: vmul.vv v24, v0, v24 ; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vmul.vv v8, v8, v24 ; RV32-NEXT: vslidedown.vi v16, v8, 8 ; RV32-NEXT: vmul.vv v8, v8, v16 ; RV32-NEXT: vslidedown.vi v16, v8, 4 @@ -6612,15 +6612,15 @@ define i64 @vreduce_mul_v64i64(ptr %x) nounwind { ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v8, (a0) -; RV64-NEXT: addi a1, a0, 384 -; RV64-NEXT: vle64.v v16, (a1) ; RV64-NEXT: addi a1, a0, 256 +; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: addi a1, a0, 384 ; RV64-NEXT: addi a0, a0, 128 -; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: vle64.v v0, (a1) -; RV64-NEXT: vmul.vv v16, v24, v16 -; RV64-NEXT: vmul.vv v8, v8, v0 +; RV64-NEXT: vle64.v v24, (a1) +; RV64-NEXT: vle64.v v0, (a0) +; RV64-NEXT: vmul.vv v24, v0, v24 ; RV64-NEXT: vmul.vv v8, v8, v16 +; RV64-NEXT: vmul.vv v8, v8, v24 ; RV64-NEXT: vslidedown.vi v16, v8, 8 ; RV64-NEXT: vmul.vv v8, v8, v16 ; RV64-NEXT: vslidedown.vi v16, v8, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll index 266772d36ee9c..70555bd6c09e2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-rint-vp.ll @@ -519,8 +519,8 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv1r.v v6, v0 -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -542,11 +542,11 @@ define <32 x double> @vp_rint_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroex ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll index a4ff079846fd8..d35637401dd66 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-round-vp.ll @@ -13,13 +13,13 @@ declare <2 x half> @llvm.vp.round.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -35,12 +35,12 @@ define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -59,12 +59,12 @@ define <2 x half> @vp_round_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) define <2 x half> @vp_round_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -77,11 +77,11 @@ define <2 x half> @vp_round_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -99,13 +99,13 @@ declare <4 x half> @llvm.vp.round.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -121,12 +121,12 @@ define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -145,12 +145,12 @@ define <4 x half> @vp_round_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) define <4 x half> @vp_round_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,11 +163,11 @@ define <4 x half> @vp_round_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -185,13 +185,13 @@ declare <8 x half> @llvm.vp.round.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -207,12 +207,12 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -231,12 +231,12 @@ define <8 x half> @vp_round_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) define <8 x half> @vp_round_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,11 +249,11 @@ define <8 x half> @vp_round_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -273,12 +273,12 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) -; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -295,12 +295,12 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -319,12 +319,12 @@ define <16 x half> @vp_round_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroext % define <16 x half> @vp_round_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_v16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -337,11 +337,11 @@ define <16 x half> @vp_round_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,9 +363,9 @@ define <2 x float> @vp_round_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -384,8 +384,8 @@ define <2 x float> @vp_round_v2f32_unmasked(<2 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -405,9 +405,9 @@ define <4 x float> @vp_round_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -426,8 +426,8 @@ define <4 x float> @vp_round_v4f32_unmasked(<4 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -448,9 +448,9 @@ define <8 x float> @vp_round_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -470,8 +470,8 @@ define <8 x float> @vp_round_v8f32_unmasked(<8 x float> %va, i32 zeroext %evl) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -492,9 +492,9 @@ define <16 x float> @vp_round_v16f32(<16 x float> %va, <16 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,8 +514,8 @@ define <16 x float> @vp_round_v16f32_unmasked(<16 x float> %va, i32 zeroext %evl ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -531,13 +531,13 @@ declare <2 x double> @llvm.vp.round.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_round_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -552,12 +552,12 @@ define <2 x double> @vp_round_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext % define <2 x double> @vp_round_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -575,12 +575,12 @@ define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -596,12 +596,12 @@ define <4 x double> @vp_round_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroext % define <4 x double> @vp_round_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -619,12 +619,12 @@ define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext % ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -640,12 +640,12 @@ define <8 x double> @vp_round_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroext % define <8 x double> @vp_round_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI21_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -663,12 +663,12 @@ define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -684,12 +684,12 @@ define <15 x double> @vp_round_v15f64(<15 x double> %va, <15 x i1> %m, i32 zeroe define <15 x double> @vp_round_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v15f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI23_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -707,12 +707,12 @@ define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -728,12 +728,12 @@ define <16 x double> @vp_round_v16f64(<16 x double> %va, <16 x i1> %m, i32 zeroe define <16 x double> @vp_round_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_v16f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI25_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -762,8 +762,8 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -778,33 +778,33 @@ define <32 x double> @vp_round_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroe ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsrmi a1, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll index c28d5fb1a8193..addb76b0bea7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundeven-vp.ll @@ -13,13 +13,13 @@ declare <2 x half> @llvm.vp.roundeven.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -35,12 +35,12 @@ define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -59,12 +59,12 @@ define <2 x half> @vp_roundeven_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext % define <2 x half> @vp_roundeven_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -77,11 +77,11 @@ define <2 x half> @vp_roundeven_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -99,13 +99,13 @@ declare <4 x half> @llvm.vp.roundeven.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -121,12 +121,12 @@ define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -145,12 +145,12 @@ define <4 x half> @vp_roundeven_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext % define <4 x half> @vp_roundeven_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,11 +163,11 @@ define <4 x half> @vp_roundeven_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -185,13 +185,13 @@ declare <8 x half> @llvm.vp.roundeven.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -207,12 +207,12 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -231,12 +231,12 @@ define <8 x half> @vp_roundeven_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext % define <8 x half> @vp_roundeven_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,11 +249,11 @@ define <8 x half> @vp_roundeven_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -273,12 +273,12 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) -; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -295,12 +295,12 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -319,12 +319,12 @@ define <16 x half> @vp_roundeven_v16f16(<16 x half> %va, <16 x i1> %m, i32 zeroe define <16 x half> @vp_roundeven_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_v16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -337,11 +337,11 @@ define <16 x half> @vp_roundeven_v16f16_unmasked(<16 x half> %va, i32 zeroext %e ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,9 +363,9 @@ define <2 x float> @vp_roundeven_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -384,8 +384,8 @@ define <2 x float> @vp_roundeven_v2f32_unmasked(<2 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -405,9 +405,9 @@ define <4 x float> @vp_roundeven_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -426,8 +426,8 @@ define <4 x float> @vp_roundeven_v4f32_unmasked(<4 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -448,9 +448,9 @@ define <8 x float> @vp_roundeven_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -470,8 +470,8 @@ define <8 x float> @vp_roundeven_v8f32_unmasked(<8 x float> %va, i32 zeroext %ev ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -492,9 +492,9 @@ define <16 x float> @vp_roundeven_v16f32(<16 x float> %va, <16 x i1> %m, i32 zer ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,8 +514,8 @@ define <16 x float> @vp_roundeven_v16f32_unmasked(<16 x float> %va, i32 zeroext ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -531,13 +531,13 @@ declare <2 x double> @llvm.vp.roundeven.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_roundeven_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -552,12 +552,12 @@ define <2 x double> @vp_roundeven_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroe define <2 x double> @vp_roundeven_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -575,12 +575,12 @@ define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -596,12 +596,12 @@ define <4 x double> @vp_roundeven_v4f64(<4 x double> %va, <4 x i1> %m, i32 zeroe define <4 x double> @vp_roundeven_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -619,12 +619,12 @@ define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -640,12 +640,12 @@ define <8 x double> @vp_roundeven_v8f64(<8 x double> %va, <8 x i1> %m, i32 zeroe define <8 x double> @vp_roundeven_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI21_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -663,12 +663,12 @@ define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 z ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -684,12 +684,12 @@ define <15 x double> @vp_roundeven_v15f64(<15 x double> %va, <15 x i1> %m, i32 z define <15 x double> @vp_roundeven_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v15f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI23_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -707,12 +707,12 @@ define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 z ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -728,12 +728,12 @@ define <16 x double> @vp_roundeven_v16f64(<16 x double> %va, <16 x i1> %m, i32 z define <16 x double> @vp_roundeven_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_v16f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI25_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -762,8 +762,8 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -778,33 +778,33 @@ define <32 x double> @vp_roundeven_v32f64(<32 x double> %va, <32 x i1> %m, i32 z ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsrmi a1, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll index 64d3664a4c372..bac25bcfec01d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-roundtozero-vp.ll @@ -13,13 +13,13 @@ declare <2 x half> @llvm.vp.roundtozero.v2f16(<2 x half>, <2 x i1>, i32) define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI0_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI0_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -35,12 +35,12 @@ define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -59,12 +59,12 @@ define <2 x half> @vp_roundtozero_v2f16(<2 x half> %va, <2 x i1> %m, i32 zeroext define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI1_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI1_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -77,11 +77,11 @@ define <2 x half> @vp_roundtozero_v2f16_unmasked(<2 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -99,13 +99,13 @@ declare <4 x half> @llvm.vp.roundtozero.v4f16(<4 x half>, <4 x i1>, i32) define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI2_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI2_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -121,12 +121,12 @@ define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v11, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v11, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv.v.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -145,12 +145,12 @@ define <4 x half> @vp_roundtozero_v4f16(<4 x half> %va, <4 x i1> %m, i32 zeroext define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI3_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI3_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -163,11 +163,11 @@ define <4 x half> @vp_roundtozero_v4f16_unmasked(<4 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -185,13 +185,13 @@ declare <8 x half> @llvm.vp.roundtozero.v8f16(<8 x half>, <8 x i1>, i32) define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI4_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -207,12 +207,12 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v12, v10, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v12, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -231,12 +231,12 @@ define <8 x half> @vp_roundtozero_v8f16(<8 x half> %va, <8 x i1> %m, i32 zeroext define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI5_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -249,11 +249,11 @@ define <8 x half> @vp_roundtozero_v8f16_unmasked(<8 x half> %va, i32 zeroext %ev ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -273,12 +273,12 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vmv1r.v v10, v0 +; ZVFH-NEXT: vfabs.v v12, v8, v0.t ; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) -; ZVFH-NEXT: vfabs.v v12, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vmflt.vf v10, v12, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vmv1r.v v0, v10 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -295,12 +295,12 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v12, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -319,12 +319,12 @@ define <16 x half> @vp_roundtozero_v16f16(<16 x half> %va, <16 x i1> %m, i32 zer define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_v16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -337,11 +337,11 @@ define <16 x half> @vp_roundtozero_v16f16_unmasked(<16 x half> %va, i32 zeroext ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,9 +363,9 @@ define <2 x float> @vp_roundtozero_v2f32(<2 x float> %va, <2 x i1> %m, i32 zeroe ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -384,8 +384,8 @@ define <2 x float> @vp_roundtozero_v2f32_unmasked(<2 x float> %va, i32 zeroext % ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -405,9 +405,9 @@ define <4 x float> @vp_roundtozero_v4f32(<4 x float> %va, <4 x i1> %m, i32 zeroe ; CHECK-NEXT: vfabs.v v9, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -426,8 +426,8 @@ define <4 x float> @vp_roundtozero_v4f32_unmasked(<4 x float> %va, i32 zeroext % ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -448,9 +448,9 @@ define <8 x float> @vp_roundtozero_v8f32(<8 x float> %va, <8 x i1> %m, i32 zeroe ; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -470,8 +470,8 @@ define <8 x float> @vp_roundtozero_v8f32_unmasked(<8 x float> %va, i32 zeroext % ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -492,9 +492,9 @@ define <16 x float> @vp_roundtozero_v16f32(<16 x float> %va, <16 x i1> %m, i32 z ; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -514,8 +514,8 @@ define <16 x float> @vp_roundtozero_v16f32_unmasked(<16 x float> %va, i32 zeroex ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -531,13 +531,13 @@ declare <2 x double> @llvm.vp.roundtozero.v2f64(<2 x double>, <2 x i1>, i32) define <2 x double> @vp_roundtozero_v2f64(<2 x double> %va, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI16_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI16_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI16_0)(a0) +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -552,12 +552,12 @@ define <2 x double> @vp_roundtozero_v2f64(<2 x double> %va, <2 x i1> %m, i32 zer define <2 x double> @vp_roundtozero_v2f64_unmasked(<2 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -575,12 +575,12 @@ define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zer ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI18_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -596,12 +596,12 @@ define <4 x double> @vp_roundtozero_v4f64(<4 x double> %va, <4 x i1> %m, i32 zer define <4 x double> @vp_roundtozero_v4f64_unmasked(<4 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -619,12 +619,12 @@ define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zer ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI20_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -640,12 +640,12 @@ define <8 x double> @vp_roundtozero_v8f64(<8 x double> %va, <8 x i1> %m, i32 zer define <8 x double> @vp_roundtozero_v8f64_unmasked(<8 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI21_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI21_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI21_0)(a0) ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -663,12 +663,12 @@ define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI22_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI22_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -684,12 +684,12 @@ define <15 x double> @vp_roundtozero_v15f64(<15 x double> %va, <15 x i1> %m, i32 define <15 x double> @vp_roundtozero_v15f64_unmasked(<15 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v15f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI23_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI23_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -707,12 +707,12 @@ define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI24_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI24_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -728,12 +728,12 @@ define <16 x double> @vp_roundtozero_v16f64(<16 x double> %va, <16 x i1> %m, i32 define <16 x double> @vp_roundtozero_v16f64_unmasked(<16 x double> %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_v16f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI25_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI25_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI25_0)(a0) ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -762,8 +762,8 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: @@ -778,33 +778,33 @@ define <32 x double> @vp_roundtozero_v32f64(<32 x double> %va, <32 x i1> %m, i32 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: and a0, a0, a1 +; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v25, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsrmi a1, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a2, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v24, v8, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t -; CHECK-NEXT: fsrm a0 +; CHECK-NEXT: fsrm a1 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll index 318f38839851c..034a969fc2847 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-select-addsub.ll @@ -141,36 +141,18 @@ define <32 x i32> @select_addsub_v32i32(<32 x i1> %cc, <32 x i32> %a, <32 x i32> define <64 x i32> @select_addsub_v64i32(<64 x i1> %cc, <64 x i32> %a, <64 x i32> %b) { ; CHECK-LABEL: select_addsub_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vle32.v v24, (a0) +; CHECK-NEXT: vrsub.vi v24, v24, 0, v0.t ; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vadd.vv v8, v8, v24 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: vrsub.vi v8, v8, 0, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, mu -; CHECK-NEXT: vadd.vv v8, v16, v8 ; CHECK-NEXT: vrsub.vi v24, v24, 0, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vv v16, v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %sub = sub <64 x i32> %a, %b %add = add <64 x i32> %a, %b diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll index 03d5762b4903e..13242fc8f0d66 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-fp-vp.ll @@ -1073,19 +1073,19 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 ; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; ZVFH-NEXT: addi a1, a0, 128 ; ZVFH-NEXT: li a3, 64 +; ZVFH-NEXT: vsetivli zero, 8, e8, m1, ta, ma +; ZVFH-NEXT: vslidedown.vi v24, v0, 8 ; ZVFH-NEXT: vsetvli zero, a3, e16, m8, ta, ma ; ZVFH-NEXT: vle16.v v16, (a1) ; ZVFH-NEXT: addi a1, sp, 16 ; ZVFH-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; ZVFH-NEXT: mv a1, a2 ; ZVFH-NEXT: vle16.v v16, (a0) -; ZVFH-NEXT: mv a0, a2 -; ZVFH-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; ZVFH-NEXT: vslidedown.vi v24, v0, 8 ; ZVFH-NEXT: bltu a2, a3, .LBB43_2 ; ZVFH-NEXT: # %bb.1: -; ZVFH-NEXT: li a0, 64 +; ZVFH-NEXT: li a1, 64 ; ZVFH-NEXT: .LBB43_2: -; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma +; ZVFH-NEXT: vsetvli zero, a1, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v7, v8, v16, v0.t ; ZVFH-NEXT: addi a0, a2, -64 ; ZVFH-NEXT: sltu a1, a2, a0 @@ -1114,20 +1114,32 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 ; ; ZVFHMIN32-LABEL: fcmp_oeq_vv_v128f16: ; ZVFHMIN32: # %bb.0: -; ZVFHMIN32-NEXT: addi sp, sp, -896 -; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 896 -; ZVFHMIN32-NEXT: sw ra, 892(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s0, 888(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s2, 884(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s3, 880(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s4, 876(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s5, 872(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s6, 868(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s7, 864(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s8, 860(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s9, 856(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s10, 852(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: sw s11, 848(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: addi sp, sp, -1024 +; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 1024 +; ZVFHMIN32-NEXT: sw ra, 1020(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s0, 1016(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s2, 1012(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s3, 1008(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s4, 1004(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s5, 1000(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s6, 996(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s7, 992(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s8, 988(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s9, 984(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s10, 980(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: sw s11, 976(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs0, 968(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs1, 960(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs2, 952(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs3, 944(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs4, 936(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs5, 928(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs6, 920(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs7, 912(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs8, 904(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs9, 896(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs10, 888(sp) # 8-byte Folded Spill +; ZVFHMIN32-NEXT: fsd fs11, 880(sp) # 8-byte Folded Spill ; ZVFHMIN32-NEXT: .cfi_offset ra, -4 ; ZVFHMIN32-NEXT: .cfi_offset s0, -8 ; ZVFHMIN32-NEXT: .cfi_offset s2, -12 @@ -1140,1096 +1152,1175 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 ; ZVFHMIN32-NEXT: .cfi_offset s9, -40 ; ZVFHMIN32-NEXT: .cfi_offset s10, -44 ; ZVFHMIN32-NEXT: .cfi_offset s11, -48 -; ZVFHMIN32-NEXT: addi s0, sp, 896 +; ZVFHMIN32-NEXT: .cfi_offset fs0, -56 +; ZVFHMIN32-NEXT: .cfi_offset fs1, -64 +; ZVFHMIN32-NEXT: .cfi_offset fs2, -72 +; ZVFHMIN32-NEXT: .cfi_offset fs3, -80 +; ZVFHMIN32-NEXT: .cfi_offset fs4, -88 +; ZVFHMIN32-NEXT: .cfi_offset fs5, -96 +; ZVFHMIN32-NEXT: .cfi_offset fs6, -104 +; ZVFHMIN32-NEXT: .cfi_offset fs7, -112 +; ZVFHMIN32-NEXT: .cfi_offset fs8, -120 +; ZVFHMIN32-NEXT: .cfi_offset fs9, -128 +; ZVFHMIN32-NEXT: .cfi_offset fs10, -136 +; ZVFHMIN32-NEXT: .cfi_offset fs11, -144 +; ZVFHMIN32-NEXT: addi s0, sp, 1024 ; ZVFHMIN32-NEXT: .cfi_def_cfa s0, 0 ; ZVFHMIN32-NEXT: csrr a1, vlenb -; ZVFHMIN32-NEXT: li a2, 30 +; ZVFHMIN32-NEXT: li a2, 41 ; ZVFHMIN32-NEXT: mul a1, a1, a2 ; ZVFHMIN32-NEXT: sub sp, sp, a1 ; ZVFHMIN32-NEXT: andi sp, sp, -128 -; ZVFHMIN32-NEXT: addi a1, a0, 128 -; ZVFHMIN32-NEXT: li a2, 64 -; ZVFHMIN32-NEXT: addi a3, sp, 640 -; ZVFHMIN32-NEXT: addi a4, sp, 384 -; ZVFHMIN32-NEXT: addi a5, sp, 512 -; ZVFHMIN32-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; ZVFHMIN32-NEXT: addi a3, a0, 128 +; ZVFHMIN32-NEXT: li a1, 64 +; ZVFHMIN32-NEXT: addi a4, sp, 640 +; ZVFHMIN32-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN32-NEXT: vmv.x.s a2, v8 +; ZVFHMIN32-NEXT: vle16.v v24, (a3) +; ZVFHMIN32-NEXT: csrr a3, vlenb +; ZVFHMIN32-NEXT: slli a5, a3, 5 +; ZVFHMIN32-NEXT: add a3, a5, a3 +; ZVFHMIN32-NEXT: add a3, sp, a3 +; ZVFHMIN32-NEXT: addi a3, a3, 880 +; ZVFHMIN32-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN32-NEXT: vle16.v v0, (a0) -; ZVFHMIN32-NEXT: addi a0, sp, 256 -; ZVFHMIN32-NEXT: vle16.v v24, (a1) -; ZVFHMIN32-NEXT: vse16.v v8, (a3) -; ZVFHMIN32-NEXT: vse16.v v0, (a4) -; ZVFHMIN32-NEXT: vse16.v v16, (a5) -; ZVFHMIN32-NEXT: vse16.v v24, (a0) -; ZVFHMIN32-NEXT: lh a0, 704(sp) +; ZVFHMIN32-NEXT: vse16.v v8, (a4) +; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 7 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a0, a0, 5 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 6 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a3, a0, 5 +; ZVFHMIN32-NEXT: sub a0, a3, a0 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 5 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 30 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 4 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 29 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 3 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 28 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 2 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 27 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 1 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 26 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 15 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 24 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 14 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 22 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 13 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 20 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 12 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 18 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 11 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a3, a0, 3 +; ZVFHMIN32-NEXT: add a0, a3, a0 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 10 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a3, a0, 3 +; ZVFHMIN32-NEXT: sub a0, a3, a0 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 9 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a3, a0, 2 +; ZVFHMIN32-NEXT: add a0, a3, a0 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v8, v8, 8 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a3, a0, 1 +; ZVFHMIN32-NEXT: add a0, a3, a0 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: addi a0, sp, 384 +; ZVFHMIN32-NEXT: addi a3, sp, 512 +; ZVFHMIN32-NEXT: vmv.x.s a5, v16 +; ZVFHMIN32-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN32-NEXT: vse16.v v0, (a0) +; ZVFHMIN32-NEXT: vse16.v v16, (a3) +; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 7 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 11 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 6 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 12 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 5 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 13 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 4 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 14 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 3 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a3, a0, 4 +; ZVFHMIN32-NEXT: sub a0, a3, a0 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 2 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a0, a0, 4 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 1 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a3, a0, 4 +; ZVFHMIN32-NEXT: add a0, a3, a0 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 15 +; ZVFHMIN32-NEXT: addi a0, sp, 880 +; ZVFHMIN32-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vslidedown.vi v4, v16, 14 +; ZVFHMIN32-NEXT: vslidedown.vi v2, v16, 13 +; ZVFHMIN32-NEXT: vslidedown.vi v24, v16, 12 +; ZVFHMIN32-NEXT: vslidedown.vi v22, v16, 11 +; ZVFHMIN32-NEXT: vslidedown.vi v20, v16, 10 +; ZVFHMIN32-NEXT: vslidedown.vi v18, v16, 9 +; ZVFHMIN32-NEXT: vslidedown.vi v16, v16, 8 +; ZVFHMIN32-NEXT: vmv.x.s a6, v0 +; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN32-NEXT: vslidedown.vi v5, v0, 7 +; ZVFHMIN32-NEXT: vslidedown.vi v17, v0, 6 +; ZVFHMIN32-NEXT: vslidedown.vi v23, v0, 5 +; ZVFHMIN32-NEXT: vslidedown.vi v19, v0, 4 +; ZVFHMIN32-NEXT: vslidedown.vi v21, v0, 3 +; ZVFHMIN32-NEXT: vslidedown.vi v3, v0, 2 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v0, 1 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a0, a0, 1 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFHMIN32-NEXT: vslidedown.vi v8, v0, 15 +; ZVFHMIN32-NEXT: vslidedown.vi v10, v0, 14 +; ZVFHMIN32-NEXT: vslidedown.vi v12, v0, 13 +; ZVFHMIN32-NEXT: vslidedown.vi v14, v0, 12 +; ZVFHMIN32-NEXT: vslidedown.vi v26, v0, 11 +; ZVFHMIN32-NEXT: vslidedown.vi v28, v0, 10 +; ZVFHMIN32-NEXT: vslidedown.vi v30, v0, 9 +; ZVFHMIN32-NEXT: vslidedown.vi v0, v0, 8 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: li a3, 24 +; ZVFHMIN32-NEXT: mul a0, a0, a3 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload +; ZVFHMIN32-NEXT: vmv.x.s a0, v6 +; ZVFHMIN32-NEXT: csrr a3, vlenb +; ZVFHMIN32-NEXT: li a4, 22 +; ZVFHMIN32-NEXT: mul a3, a3, a4 +; ZVFHMIN32-NEXT: add a3, sp, a3 +; ZVFHMIN32-NEXT: addi a3, a3, 880 +; ZVFHMIN32-NEXT: vl2r.v v6, (a3) # Unknown-size Folded Reload +; ZVFHMIN32-NEXT: vmv.x.s a3, v6 +; ZVFHMIN32-NEXT: csrr a4, vlenb +; ZVFHMIN32-NEXT: li a7, 20 +; ZVFHMIN32-NEXT: mul a4, a4, a7 +; ZVFHMIN32-NEXT: add a4, sp, a4 +; ZVFHMIN32-NEXT: addi a4, a4, 880 +; ZVFHMIN32-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN32-NEXT: vmv.x.s a7, v6 +; ZVFHMIN32-NEXT: csrr a4, vlenb +; ZVFHMIN32-NEXT: li t0, 18 +; ZVFHMIN32-NEXT: mul a4, a4, t0 +; ZVFHMIN32-NEXT: add a4, sp, a4 +; ZVFHMIN32-NEXT: addi a4, a4, 880 +; ZVFHMIN32-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN32-NEXT: vmv.x.s s3, v6 +; ZVFHMIN32-NEXT: csrr a4, vlenb +; ZVFHMIN32-NEXT: slli t0, a4, 3 +; ZVFHMIN32-NEXT: add a4, t0, a4 +; ZVFHMIN32-NEXT: add a4, sp, a4 +; ZVFHMIN32-NEXT: addi a4, a4, 880 +; ZVFHMIN32-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN32-NEXT: vmv.x.s s10, v6 +; ZVFHMIN32-NEXT: csrr a4, vlenb +; ZVFHMIN32-NEXT: slli t0, a4, 3 +; ZVFHMIN32-NEXT: sub a4, t0, a4 +; ZVFHMIN32-NEXT: add a4, sp, a4 +; ZVFHMIN32-NEXT: addi a4, a4, 880 +; ZVFHMIN32-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN32-NEXT: vmv.x.s s11, v6 +; ZVFHMIN32-NEXT: csrr a4, vlenb +; ZVFHMIN32-NEXT: slli t0, a4, 2 +; ZVFHMIN32-NEXT: add a4, t0, a4 +; ZVFHMIN32-NEXT: add a4, sp, a4 +; ZVFHMIN32-NEXT: addi a4, a4, 880 +; ZVFHMIN32-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN32-NEXT: vmv.x.s s5, v6 +; ZVFHMIN32-NEXT: csrr a4, vlenb +; ZVFHMIN32-NEXT: slli t0, a4, 1 +; ZVFHMIN32-NEXT: add a4, t0, a4 +; ZVFHMIN32-NEXT: add a4, sp, a4 +; ZVFHMIN32-NEXT: addi a4, a4, 880 +; ZVFHMIN32-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN32-NEXT: vmv.x.s s7, v6 +; ZVFHMIN32-NEXT: addi a4, sp, 880 +; ZVFHMIN32-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN32-NEXT: vmv.x.s s9, v6 +; ZVFHMIN32-NEXT: vmv.x.s s8, v4 +; ZVFHMIN32-NEXT: vmv.x.s s6, v2 +; ZVFHMIN32-NEXT: vmv.x.s s4, v24 +; ZVFHMIN32-NEXT: vmv.x.s s2, v22 +; ZVFHMIN32-NEXT: vmv.x.s a4, v20 +; ZVFHMIN32-NEXT: vmv.x.s t0, v18 +; ZVFHMIN32-NEXT: sw t0, 120(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: vmv.x.s t0, v16 +; ZVFHMIN32-NEXT: sw t0, 124(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: vmv.x.s t6, v8 +; ZVFHMIN32-NEXT: vmv.x.s t0, v10 +; ZVFHMIN32-NEXT: vmv.x.s t1, v12 +; ZVFHMIN32-NEXT: vmv.x.s t2, v14 +; ZVFHMIN32-NEXT: vmv.x.s t3, v26 +; ZVFHMIN32-NEXT: vmv.x.s t4, v28 +; ZVFHMIN32-NEXT: vmv.x.s t5, v30 +; ZVFHMIN32-NEXT: fmv.h.x fs8, a2 +; ZVFHMIN32-NEXT: fmv.h.x fs7, a5 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: slli a2, a2, 5 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x fs6, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: slli a5, a2, 5 +; ZVFHMIN32-NEXT: sub a2, a5, a2 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x fs5, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: li a5, 30 +; ZVFHMIN32-NEXT: mul a2, a2, a5 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x ft10, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: li a5, 29 +; ZVFHMIN32-NEXT: mul a2, a2, a5 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x ft8, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: li a5, 28 +; ZVFHMIN32-NEXT: mul a2, a2, a5 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x ft2, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: li a5, 27 +; ZVFHMIN32-NEXT: mul a2, a2, a5 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x ft3, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: li a5, 26 +; ZVFHMIN32-NEXT: mul a2, a2, a5 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x ft4, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: li a5, 11 +; ZVFHMIN32-NEXT: mul a2, a2, a5 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x ft5, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: li a5, 12 +; ZVFHMIN32-NEXT: mul a2, a2, a5 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x ft6, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: li a5, 13 +; ZVFHMIN32-NEXT: mul a2, a2, a5 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x fa6, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: li a5, 14 +; ZVFHMIN32-NEXT: mul a2, a2, a5 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x fs0, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: slli a5, a2, 4 +; ZVFHMIN32-NEXT: sub a2, a5, a2 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x fs1, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: slli a2, a2, 4 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x fs2, a2 +; ZVFHMIN32-NEXT: csrr a2, vlenb +; ZVFHMIN32-NEXT: slli a5, a2, 4 +; ZVFHMIN32-NEXT: add a2, a5, a2 +; ZVFHMIN32-NEXT: add a2, sp, a2 +; ZVFHMIN32-NEXT: lh a2, 880(a2) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x fs3, a2 +; ZVFHMIN32-NEXT: addi a2, sp, 256 +; ZVFHMIN32-NEXT: fmv.h.x fs4, a0 +; ZVFHMIN32-NEXT: fmv.h.x ft7, a3 +; ZVFHMIN32-NEXT: fmv.h.x ft11, a7 +; ZVFHMIN32-NEXT: fmv.h.x ft9, s3 +; ZVFHMIN32-NEXT: fmv.h.x fa7, s10 +; ZVFHMIN32-NEXT: fmv.h.x fa5, s11 +; ZVFHMIN32-NEXT: fsh fa5, 114(sp) # 2-byte Folded Spill +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a3, a0, 5 +; ZVFHMIN32-NEXT: add a0, a3, a0 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: addi a0, a0, 880 +; ZVFHMIN32-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN32-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN32-NEXT: vse16.v v24, (a2) +; ZVFHMIN32-NEXT: vmv.x.s a3, v0 +; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 15 +; ZVFHMIN32-NEXT: vmv.x.s a5, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 14 +; ZVFHMIN32-NEXT: vmv.x.s ra, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 13 +; ZVFHMIN32-NEXT: vmv.x.s a2, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 12 +; ZVFHMIN32-NEXT: vmv.x.s a1, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 11 +; ZVFHMIN32-NEXT: vmv.x.s s3, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 10 +; ZVFHMIN32-NEXT: vmv.x.s a7, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 9 +; ZVFHMIN32-NEXT: vmv.x.s a0, v8 +; ZVFHMIN32-NEXT: sw a0, 116(sp) # 4-byte Folded Spill +; ZVFHMIN32-NEXT: fmv.h.x fa3, s5 +; ZVFHMIN32-NEXT: vmv.x.s s5, v5 +; ZVFHMIN32-NEXT: fmv.h.x fa2, s7 +; ZVFHMIN32-NEXT: vmv.x.s s7, v17 +; ZVFHMIN32-NEXT: fmv.h.x fa1, s9 +; ZVFHMIN32-NEXT: vmv.x.s s9, v23 +; ZVFHMIN32-NEXT: fmv.h.x fa0, s8 +; ZVFHMIN32-NEXT: vmv.x.s s8, v19 +; ZVFHMIN32-NEXT: fmv.h.x ft0, s6 +; ZVFHMIN32-NEXT: vmv.x.s s6, v21 +; ZVFHMIN32-NEXT: fmv.h.x ft1, s4 +; ZVFHMIN32-NEXT: vmv.x.s s10, v3 +; ZVFHMIN32-NEXT: fmv.h.x fa5, s2 +; ZVFHMIN32-NEXT: fsh fa5, 112(sp) # 2-byte Folded Spill +; ZVFHMIN32-NEXT: vmv.x.s s2, v24 +; ZVFHMIN32-NEXT: fmv.h.x fs9, a6 +; ZVFHMIN32-NEXT: csrr a0, vlenb +; ZVFHMIN32-NEXT: slli a0, a0, 1 +; ZVFHMIN32-NEXT: add a0, sp, a0 +; ZVFHMIN32-NEXT: lh a6, 880(a0) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 7 +; ZVFHMIN32-NEXT: fmv.h.x fs10, s2 +; ZVFHMIN32-NEXT: vmv.x.s a0, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 6 +; ZVFHMIN32-NEXT: fmv.h.x fs11, s5 +; ZVFHMIN32-NEXT: feq.h s2, fs8, fs9 +; ZVFHMIN32-NEXT: fmv.h.x fs8, s7 +; ZVFHMIN32-NEXT: vmv.x.s s7, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 5 +; ZVFHMIN32-NEXT: fmv.h.x fs9, s9 +; ZVFHMIN32-NEXT: feq.h s11, fs7, fs10 +; ZVFHMIN32-NEXT: fmv.h.x fs7, s8 +; ZVFHMIN32-NEXT: vmv.x.s s8, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 4 +; ZVFHMIN32-NEXT: fmv.h.x fs10, s6 +; ZVFHMIN32-NEXT: feq.h s4, fs6, fs11 +; ZVFHMIN32-NEXT: fmv.h.x fs6, s10 +; ZVFHMIN32-NEXT: vmv.x.s s9, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 3 +; ZVFHMIN32-NEXT: fmv.h.x fs11, a6 +; ZVFHMIN32-NEXT: feq.h s5, fs5, fs8 +; ZVFHMIN32-NEXT: fmv.h.x fs5, a0 +; ZVFHMIN32-NEXT: vmv.x.s a0, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 2 +; ZVFHMIN32-NEXT: fmv.h.x fs8, s7 +; ZVFHMIN32-NEXT: feq.h s6, ft10, fs9 +; ZVFHMIN32-NEXT: fmv.h.x fs9, s8 +; ZVFHMIN32-NEXT: vmv.x.s a6, v8 +; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 1 +; ZVFHMIN32-NEXT: fmv.h.x fa5, s9 +; ZVFHMIN32-NEXT: feq.h s7, ft8, fs7 +; ZVFHMIN32-NEXT: fmv.h.x fs7, a0 +; ZVFHMIN32-NEXT: vmv.x.s a0, v8 +; ZVFHMIN32-NEXT: fmv.h.x fa4, a6 +; ZVFHMIN32-NEXT: feq.h s8, ft2, fs10 +; ZVFHMIN32-NEXT: fmv.h.x fs10, a0 +; ZVFHMIN32-NEXT: feq.h s9, ft3, fs6 +; ZVFHMIN32-NEXT: fmv.h.x fs6, t6 +; ZVFHMIN32-NEXT: feq.h s10, ft4, fs11 +; ZVFHMIN32-NEXT: fmv.h.x fs11, t0 +; ZVFHMIN32-NEXT: feq.h t0, ft5, fs5 +; ZVFHMIN32-NEXT: fmv.h.x fs5, t1 +; ZVFHMIN32-NEXT: feq.h t1, ft6, fs8 +; ZVFHMIN32-NEXT: fmv.h.x ft10, t2 +; ZVFHMIN32-NEXT: feq.h t2, fa6, fs9 +; ZVFHMIN32-NEXT: fmv.h.x ft8, t3 +; ZVFHMIN32-NEXT: feq.h t3, fs0, fa5 +; ZVFHMIN32-NEXT: fmv.h.x ft2, t4 +; ZVFHMIN32-NEXT: feq.h t4, fs1, fs7 +; ZVFHMIN32-NEXT: fmv.h.x ft3, t5 +; ZVFHMIN32-NEXT: feq.h t5, fs2, fa4 +; ZVFHMIN32-NEXT: fmv.h.x ft4, a3 +; ZVFHMIN32-NEXT: feq.h t6, fs3, fs10 +; ZVFHMIN32-NEXT: fmv.h.x ft5, a5 +; ZVFHMIN32-NEXT: feq.h a0, fs4, fs6 +; ZVFHMIN32-NEXT: fmv.h.x ft6, ra +; ZVFHMIN32-NEXT: feq.h a5, ft7, fs11 +; ZVFHMIN32-NEXT: fmv.h.x ft7, a2 +; ZVFHMIN32-NEXT: lh a2, 704(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa6, a1 +; ZVFHMIN32-NEXT: feq.h a6, ft11, fs5 +; ZVFHMIN32-NEXT: fmv.h.x fa5, a2 ; ZVFHMIN32-NEXT: lh a1, 448(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 160(sp) -; ZVFHMIN32-NEXT: lh a0, 702(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 160(sp) +; ZVFHMIN32-NEXT: lh a1, 702(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 446(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 159(sp) -; ZVFHMIN32-NEXT: lh a0, 700(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 159(sp) +; ZVFHMIN32-NEXT: lh a1, 700(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 444(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 158(sp) -; ZVFHMIN32-NEXT: lh a0, 698(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 158(sp) +; ZVFHMIN32-NEXT: lh a1, 698(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 442(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 157(sp) -; ZVFHMIN32-NEXT: lh a0, 696(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 157(sp) +; ZVFHMIN32-NEXT: lh a1, 696(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 440(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 156(sp) -; ZVFHMIN32-NEXT: lh a0, 694(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 156(sp) +; ZVFHMIN32-NEXT: lh a1, 694(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 438(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 155(sp) -; ZVFHMIN32-NEXT: lh a0, 692(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 155(sp) +; ZVFHMIN32-NEXT: lh a1, 692(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 436(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 154(sp) -; ZVFHMIN32-NEXT: lh a0, 690(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 154(sp) +; ZVFHMIN32-NEXT: lh a1, 690(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 434(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 153(sp) -; ZVFHMIN32-NEXT: lh a0, 688(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 153(sp) +; ZVFHMIN32-NEXT: lh a1, 688(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 432(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 152(sp) -; ZVFHMIN32-NEXT: lh a0, 686(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 152(sp) +; ZVFHMIN32-NEXT: lh a1, 686(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 430(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 151(sp) -; ZVFHMIN32-NEXT: lh a0, 684(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 151(sp) +; ZVFHMIN32-NEXT: lh a1, 684(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 428(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 150(sp) -; ZVFHMIN32-NEXT: lh a0, 682(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 150(sp) +; ZVFHMIN32-NEXT: lh a1, 682(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 426(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 149(sp) -; ZVFHMIN32-NEXT: lh a0, 680(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 149(sp) +; ZVFHMIN32-NEXT: lh a1, 680(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 424(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 148(sp) -; ZVFHMIN32-NEXT: lh a0, 678(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 148(sp) +; ZVFHMIN32-NEXT: lh a1, 678(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 422(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 147(sp) -; ZVFHMIN32-NEXT: lh a0, 676(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 147(sp) +; ZVFHMIN32-NEXT: lh a1, 676(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 420(sp) -; ZVFHMIN32-NEXT: vmv.x.s a2, v8 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 146(sp) -; ZVFHMIN32-NEXT: lh a0, 674(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 146(sp) +; ZVFHMIN32-NEXT: lh a1, 674(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 418(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a2 -; ZVFHMIN32-NEXT: vmv.x.s a2, v0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa4, fa3 -; ZVFHMIN32-NEXT: sb a0, 145(sp) -; ZVFHMIN32-NEXT: lh a0, 672(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 145(sp) +; ZVFHMIN32-NEXT: lh a1, 672(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 416(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN32-NEXT: feq.h a2, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a2, 128(sp) -; ZVFHMIN32-NEXT: sb a0, 144(sp) -; ZVFHMIN32-NEXT: lh a0, 576(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb s2, 128(sp) +; ZVFHMIN32-NEXT: feq.h s2, ft9, ft10 +; ZVFHMIN32-NEXT: sb a1, 144(sp) +; ZVFHMIN32-NEXT: lh a1, 576(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 320(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 224(sp) -; ZVFHMIN32-NEXT: lh a0, 574(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 224(sp) +; ZVFHMIN32-NEXT: lh a1, 574(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 318(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 223(sp) -; ZVFHMIN32-NEXT: lh a0, 572(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 223(sp) +; ZVFHMIN32-NEXT: lh a1, 572(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 316(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 222(sp) -; ZVFHMIN32-NEXT: lh a0, 570(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 222(sp) +; ZVFHMIN32-NEXT: lh a1, 570(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 314(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 221(sp) -; ZVFHMIN32-NEXT: lh a0, 568(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 221(sp) +; ZVFHMIN32-NEXT: lh a1, 568(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 312(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 220(sp) -; ZVFHMIN32-NEXT: lh a0, 566(sp) -; ZVFHMIN32-NEXT: lh a1, 310(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 220(sp) +; ZVFHMIN32-NEXT: lh a1, 566(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 310(sp) ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 219(sp) -; ZVFHMIN32-NEXT: lh a0, 564(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 219(sp) +; ZVFHMIN32-NEXT: lh a1, 564(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 308(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 218(sp) -; ZVFHMIN32-NEXT: lh a0, 562(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 218(sp) +; ZVFHMIN32-NEXT: lh a1, 562(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 306(sp) -; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 7 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 29 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 6 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 28 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 5 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 27 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 4 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 26 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 3 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 25 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 2 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 24 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 1 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 23 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; ZVFHMIN32-NEXT: vslidedown.vi v26, v8, 15 -; ZVFHMIN32-NEXT: vslidedown.vi v20, v8, 14 -; ZVFHMIN32-NEXT: vslidedown.vi v28, v8, 13 -; ZVFHMIN32-NEXT: vslidedown.vi v10, v8, 12 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: slli a2, a2, 1 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs2r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v4, v8, 11 -; ZVFHMIN32-NEXT: vslidedown.vi v2, v8, 10 -; ZVFHMIN32-NEXT: vslidedown.vi v30, v8, 9 -; ZVFHMIN32-NEXT: vslidedown.vi v22, v8, 8 -; ZVFHMIN32-NEXT: vmv.x.s a4, v16 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 217(sp) -; ZVFHMIN32-NEXT: lh a0, 560(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 217(sp) +; ZVFHMIN32-NEXT: lh a1, 560(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 304(sp) -; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; ZVFHMIN32-NEXT: vslidedown.vi v3, v16, 7 -; ZVFHMIN32-NEXT: vslidedown.vi v31, v16, 6 -; ZVFHMIN32-NEXT: vslidedown.vi v5, v16, 5 -; ZVFHMIN32-NEXT: vslidedown.vi v23, v16, 4 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 3 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 21 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 2 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 20 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 1 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 22 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; ZVFHMIN32-NEXT: vslidedown.vi v18, v16, 15 -; ZVFHMIN32-NEXT: vslidedown.vi v14, v16, 14 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v16, 13 -; ZVFHMIN32-NEXT: vslidedown.vi v12, v16, 12 -; ZVFHMIN32-NEXT: vslidedown.vi v10, v16, 11 -; ZVFHMIN32-NEXT: vslidedown.vi v6, v16, 10 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 18 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v6, v16, 9 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 14 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v6, v16, 8 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 216(sp) -; ZVFHMIN32-NEXT: lh a0, 558(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 216(sp) +; ZVFHMIN32-NEXT: lh a1, 558(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 302(sp) -; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; ZVFHMIN32-NEXT: vslidedown.vi v13, v0, 7 -; ZVFHMIN32-NEXT: vslidedown.vi v29, v0, 6 -; ZVFHMIN32-NEXT: vslidedown.vi v11, v0, 5 -; ZVFHMIN32-NEXT: vslidedown.vi v7, v0, 4 -; ZVFHMIN32-NEXT: vslidedown.vi v9, v0, 3 -; ZVFHMIN32-NEXT: vslidedown.vi v21, v0, 2 -; ZVFHMIN32-NEXT: vslidedown.vi v27, v0, 1 -; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; ZVFHMIN32-NEXT: vslidedown.vi v16, v0, 15 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: slli a2, a2, 2 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v16, v0, 14 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: slli a2, a2, 3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v16, v0, 13 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 6 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v16, v0, 12 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 12 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v16, v0, 11 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 10 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v16, v0, 10 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: slli a2, a2, 4 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vslidedown.vi v16, v0, 9 -; ZVFHMIN32-NEXT: vslidedown.vi v0, v0, 8 -; ZVFHMIN32-NEXT: addi a2, sp, 848 -; ZVFHMIN32-NEXT: vs2r.v v0, (a2) # Unknown-size Folded Spill -; ZVFHMIN32-NEXT: vmv.x.s t4, v26 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 215(sp) -; ZVFHMIN32-NEXT: lh a0, 556(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 215(sp) +; ZVFHMIN32-NEXT: lh a1, 556(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 300(sp) -; ZVFHMIN32-NEXT: vmv.x.s t3, v20 -; ZVFHMIN32-NEXT: vmv.x.s t1, v28 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 214(sp) -; ZVFHMIN32-NEXT: lh a0, 554(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 214(sp) +; ZVFHMIN32-NEXT: lh a1, 554(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 298(sp) -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: slli a2, a2, 1 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vl2r.v v0, (a2) # Unknown-size Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s t2, v0 -; ZVFHMIN32-NEXT: vmv.x.s t0, v4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 213(sp) -; ZVFHMIN32-NEXT: lh a0, 552(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 213(sp) +; ZVFHMIN32-NEXT: lh a1, 552(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 296(sp) -; ZVFHMIN32-NEXT: vmv.x.s a7, v2 -; ZVFHMIN32-NEXT: vmv.x.s a6, v30 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 212(sp) -; ZVFHMIN32-NEXT: lh a0, 550(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 212(sp) +; ZVFHMIN32-NEXT: lh a1, 550(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 294(sp) -; ZVFHMIN32-NEXT: vmv.x.s a5, v22 -; ZVFHMIN32-NEXT: vmv.x.s a2, v18 -; ZVFHMIN32-NEXT: sw a2, 112(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 211(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 211(sp) ; ZVFHMIN32-NEXT: lh a1, 548(sp) -; ZVFHMIN32-NEXT: lh t5, 292(sp) -; ZVFHMIN32-NEXT: vmv.x.s a0, v14 -; ZVFHMIN32-NEXT: sw a0, 116(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: vmv.x.s a0, v8 -; ZVFHMIN32-NEXT: sw a0, 124(sp) # 4-byte Folded Spill ; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 -; ZVFHMIN32-NEXT: fmv.h.x fa4, t5 +; ZVFHMIN32-NEXT: lh a1, 292(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 ; ZVFHMIN32-NEXT: sb a1, 210(sp) ; ZVFHMIN32-NEXT: lh a1, 546(sp) -; ZVFHMIN32-NEXT: lh t5, 290(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a4 -; ZVFHMIN32-NEXT: vmv.x.s a4, v24 +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 290(sp) ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: fmv.h.x fa3, t5 -; ZVFHMIN32-NEXT: feq.h a1, fa4, fa3 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 ; ZVFHMIN32-NEXT: sb a1, 209(sp) ; ZVFHMIN32-NEXT: lh a1, 544(sp) -; ZVFHMIN32-NEXT: lh t5, 288(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, a4 -; ZVFHMIN32-NEXT: feq.h a4, fa5, fa4 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 -; ZVFHMIN32-NEXT: fmv.h.x fa4, t5 +; ZVFHMIN32-NEXT: lh a1, 288(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 -; ZVFHMIN32-NEXT: sb a4, 192(sp) +; ZVFHMIN32-NEXT: sb s11, 192(sp) +; ZVFHMIN32-NEXT: feq.h s11, fa7, ft8 ; ZVFHMIN32-NEXT: sb a1, 208(sp) -; ZVFHMIN32-NEXT: lh t5, 738(sp) -; ZVFHMIN32-NEXT: lh t6, 482(sp) -; ZVFHMIN32-NEXT: vmv.x.s a0, v12 -; ZVFHMIN32-NEXT: sw a0, 108(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: vmv.x.s a0, v10 -; ZVFHMIN32-NEXT: sw a0, 120(sp) # 4-byte Folded Spill -; ZVFHMIN32-NEXT: fmv.h.x fa5, t5 -; ZVFHMIN32-NEXT: fmv.h.x fa4, t6 -; ZVFHMIN32-NEXT: feq.h t5, fa5, fa4 -; ZVFHMIN32-NEXT: sb t5, 177(sp) -; ZVFHMIN32-NEXT: lh t5, 736(sp) -; ZVFHMIN32-NEXT: lh t6, 480(sp) -; ZVFHMIN32-NEXT: csrr a0, vlenb -; ZVFHMIN32-NEXT: li a1, 29 -; ZVFHMIN32-NEXT: mul a0, a0, a1 -; ZVFHMIN32-NEXT: add a0, sp, a0 -; ZVFHMIN32-NEXT: lh s5, 848(a0) # 8-byte Folded Reload -; ZVFHMIN32-NEXT: csrr a0, vlenb -; ZVFHMIN32-NEXT: li a1, 28 -; ZVFHMIN32-NEXT: mul a0, a0, a1 -; ZVFHMIN32-NEXT: add a0, sp, a0 -; ZVFHMIN32-NEXT: lh s6, 848(a0) # 8-byte Folded Reload -; ZVFHMIN32-NEXT: fmv.h.x fa5, t5 -; ZVFHMIN32-NEXT: fmv.h.x fa4, t6 -; ZVFHMIN32-NEXT: feq.h t5, fa5, fa4 -; ZVFHMIN32-NEXT: sb t5, 176(sp) -; ZVFHMIN32-NEXT: lh t5, 734(sp) -; ZVFHMIN32-NEXT: lh t6, 478(sp) -; ZVFHMIN32-NEXT: csrr a0, vlenb -; ZVFHMIN32-NEXT: li a1, 27 -; ZVFHMIN32-NEXT: mul a0, a0, a1 -; ZVFHMIN32-NEXT: add a0, sp, a0 -; ZVFHMIN32-NEXT: lh s7, 848(a0) # 8-byte Folded Reload -; ZVFHMIN32-NEXT: csrr a0, vlenb -; ZVFHMIN32-NEXT: li a1, 26 -; ZVFHMIN32-NEXT: mul a0, a0, a1 -; ZVFHMIN32-NEXT: add a0, sp, a0 -; ZVFHMIN32-NEXT: lh s8, 848(a0) # 8-byte Folded Reload -; ZVFHMIN32-NEXT: fmv.h.x fa5, t5 -; ZVFHMIN32-NEXT: fmv.h.x fa4, t6 -; ZVFHMIN32-NEXT: feq.h t5, fa5, fa4 -; ZVFHMIN32-NEXT: sb t5, 175(sp) -; ZVFHMIN32-NEXT: lh t5, 732(sp) -; ZVFHMIN32-NEXT: lh t6, 476(sp) -; ZVFHMIN32-NEXT: csrr a0, vlenb -; ZVFHMIN32-NEXT: li a1, 25 -; ZVFHMIN32-NEXT: mul a0, a0, a1 -; ZVFHMIN32-NEXT: add a0, sp, a0 -; ZVFHMIN32-NEXT: lh s4, 848(a0) # 8-byte Folded Reload -; ZVFHMIN32-NEXT: csrr a0, vlenb -; ZVFHMIN32-NEXT: li a1, 24 -; ZVFHMIN32-NEXT: mul a0, a0, a1 -; ZVFHMIN32-NEXT: add a0, sp, a0 -; ZVFHMIN32-NEXT: lh s3, 848(a0) # 8-byte Folded Reload -; ZVFHMIN32-NEXT: fmv.h.x fa5, t5 -; ZVFHMIN32-NEXT: fmv.h.x fa4, t6 -; ZVFHMIN32-NEXT: feq.h t5, fa5, fa4 -; ZVFHMIN32-NEXT: sb t5, 174(sp) -; ZVFHMIN32-NEXT: lh t6, 730(sp) -; ZVFHMIN32-NEXT: lh s9, 474(sp) -; ZVFHMIN32-NEXT: csrr a0, vlenb -; ZVFHMIN32-NEXT: li a1, 23 -; ZVFHMIN32-NEXT: mul a0, a0, a1 -; ZVFHMIN32-NEXT: add a0, sp, a0 -; ZVFHMIN32-NEXT: lh s2, 848(a0) # 8-byte Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s t5, v3 -; ZVFHMIN32-NEXT: fmv.h.x fa5, t6 -; ZVFHMIN32-NEXT: fmv.h.x fa4, s9 -; ZVFHMIN32-NEXT: feq.h t6, fa5, fa4 -; ZVFHMIN32-NEXT: sb t6, 173(sp) -; ZVFHMIN32-NEXT: lh s9, 728(sp) -; ZVFHMIN32-NEXT: lh s10, 472(sp) -; ZVFHMIN32-NEXT: vmv.x.s t6, v31 -; ZVFHMIN32-NEXT: vmv.x.s ra, v13 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s9 -; ZVFHMIN32-NEXT: fmv.h.x fa4, s10 -; ZVFHMIN32-NEXT: feq.h s9, fa5, fa4 -; ZVFHMIN32-NEXT: sb s9, 172(sp) -; ZVFHMIN32-NEXT: lh s9, 726(sp) -; ZVFHMIN32-NEXT: lh s10, 470(sp) -; ZVFHMIN32-NEXT: vmv.x.s a2, v29 -; ZVFHMIN32-NEXT: vmv.x.s a3, v11 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s9 -; ZVFHMIN32-NEXT: fmv.h.x fa4, s10 -; ZVFHMIN32-NEXT: feq.h s9, fa5, fa4 -; ZVFHMIN32-NEXT: sb s9, 171(sp) -; ZVFHMIN32-NEXT: lh s10, 724(sp) -; ZVFHMIN32-NEXT: lh s11, 468(sp) -; ZVFHMIN32-NEXT: vmv.x.s a4, v7 -; ZVFHMIN32-NEXT: vmv.x.s s9, v9 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s10 -; ZVFHMIN32-NEXT: fmv.h.x fa4, s11 -; ZVFHMIN32-NEXT: feq.h s10, fa5, fa4 -; ZVFHMIN32-NEXT: sb s10, 170(sp) -; ZVFHMIN32-NEXT: lh a0, 722(sp) +; ZVFHMIN32-NEXT: lh a1, 738(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 482(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 177(sp) +; ZVFHMIN32-NEXT: lh a1, 736(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 480(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 176(sp) +; ZVFHMIN32-NEXT: lh a1, 734(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 478(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 175(sp) +; ZVFHMIN32-NEXT: lh a1, 732(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 476(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 174(sp) +; ZVFHMIN32-NEXT: lh a1, 730(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 474(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 173(sp) +; ZVFHMIN32-NEXT: lh a1, 728(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 472(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 172(sp) +; ZVFHMIN32-NEXT: lh a1, 726(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 470(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 171(sp) +; ZVFHMIN32-NEXT: lh a1, 724(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 468(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 170(sp) +; ZVFHMIN32-NEXT: lh a1, 722(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 466(sp) -; ZVFHMIN32-NEXT: vmv.x.s s10, v21 -; ZVFHMIN32-NEXT: vmv.x.s s11, v27 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 169(sp) -; ZVFHMIN32-NEXT: lh a0, 720(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 169(sp) +; ZVFHMIN32-NEXT: lh a1, 720(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 464(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, s5 -; ZVFHMIN32-NEXT: fmv.h.x fa4, s6 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN32-NEXT: sb a0, 168(sp) -; ZVFHMIN32-NEXT: lh a0, 718(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 168(sp) +; ZVFHMIN32-NEXT: lh a1, 718(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 462(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa3, s7 -; ZVFHMIN32-NEXT: fmv.h.x fa2, s8 -; ZVFHMIN32-NEXT: fmv.h.x fa1, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa0, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa1, fa0 -; ZVFHMIN32-NEXT: fmv.h.x fa1, ra -; ZVFHMIN32-NEXT: sb a0, 167(sp) -; ZVFHMIN32-NEXT: lh a0, 716(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa0, a2 +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 167(sp) +; ZVFHMIN32-NEXT: lh a1, 716(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 460(sp) -; ZVFHMIN32-NEXT: feq.h s5, fa5, fa1 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: feq.h a0, fa4, fa0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s4 ; ZVFHMIN32-NEXT: sb a1, 166(sp) ; ZVFHMIN32-NEXT: lh a1, 714(sp) -; ZVFHMIN32-NEXT: lh a2, 458(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, a3 -; ZVFHMIN32-NEXT: feq.h a3, fa3, fa4 +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 458(sp) ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a2 -; ZVFHMIN32-NEXT: feq.h a1, fa4, fa3 -; ZVFHMIN32-NEXT: fmv.h.x fa4, s3 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 ; ZVFHMIN32-NEXT: sb a1, 165(sp) ; ZVFHMIN32-NEXT: lh a1, 712(sp) -; ZVFHMIN32-NEXT: lh a2, 456(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa3, a4 -; ZVFHMIN32-NEXT: feq.h a4, fa2, fa3 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a1 -; ZVFHMIN32-NEXT: fmv.h.x fa2, a2 -; ZVFHMIN32-NEXT: feq.h a1, fa3, fa2 -; ZVFHMIN32-NEXT: fmv.h.x fa3, s2 +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 456(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 ; ZVFHMIN32-NEXT: sb a1, 164(sp) ; ZVFHMIN32-NEXT: lh a1, 710(sp) -; ZVFHMIN32-NEXT: lh a2, 454(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa2, s9 -; ZVFHMIN32-NEXT: feq.h s2, fa5, fa2 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 -; ZVFHMIN32-NEXT: fmv.h.x fa2, a2 -; ZVFHMIN32-NEXT: feq.h a1, fa5, fa2 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s10 -; ZVFHMIN32-NEXT: fmv.h.x fa2, s11 +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 454(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 ; ZVFHMIN32-NEXT: sb a1, 163(sp) ; ZVFHMIN32-NEXT: lh a1, 708(sp) -; ZVFHMIN32-NEXT: lh a2, 452(sp) -; ZVFHMIN32-NEXT: feq.h s3, fa4, fa5 -; ZVFHMIN32-NEXT: feq.h s4, fa3, fa2 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 +; ZVFHMIN32-NEXT: lh a1, 452(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: fmv.h.x fa5, s3 ; ZVFHMIN32-NEXT: sb a1, 162(sp) ; ZVFHMIN32-NEXT: lh a1, 706(sp) ; ZVFHMIN32-NEXT: lh a2, 450(sp) -; ZVFHMIN32-NEXT: sb s4, 129(sp) -; ZVFHMIN32-NEXT: sb s3, 130(sp) -; ZVFHMIN32-NEXT: sb s2, 131(sp) -; ZVFHMIN32-NEXT: sb a4, 132(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 -; ZVFHMIN32-NEXT: sb a3, 133(sp) -; ZVFHMIN32-NEXT: sb a0, 134(sp) -; ZVFHMIN32-NEXT: sb s5, 135(sp) +; ZVFHMIN32-NEXT: sb s10, 129(sp) +; ZVFHMIN32-NEXT: flh fa4, 114(sp) # 2-byte Folded Reload +; ZVFHMIN32-NEXT: feq.h s10, fa4, ft2 +; ZVFHMIN32-NEXT: sb s9, 130(sp) +; ZVFHMIN32-NEXT: feq.h s9, fa3, ft3 +; ZVFHMIN32-NEXT: sb s8, 131(sp) +; ZVFHMIN32-NEXT: feq.h ra, fa2, ft4 +; ZVFHMIN32-NEXT: sb s7, 132(sp) +; ZVFHMIN32-NEXT: feq.h s3, fa1, ft5 +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h s7, fa0, ft6 +; ZVFHMIN32-NEXT: fmv.h.x fa3, a2 +; ZVFHMIN32-NEXT: feq.h s8, ft0, ft7 +; ZVFHMIN32-NEXT: sb s6, 133(sp) +; ZVFHMIN32-NEXT: feq.h s6, ft1, fa6 +; ZVFHMIN32-NEXT: sb s5, 134(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa4, fa3 +; ZVFHMIN32-NEXT: sb s4, 135(sp) +; ZVFHMIN32-NEXT: flh fa4, 112(sp) # 2-byte Folded Reload +; ZVFHMIN32-NEXT: feq.h s4, fa4, fa5 ; ZVFHMIN32-NEXT: sb a1, 161(sp) -; ZVFHMIN32-NEXT: lh a0, 610(sp) +; ZVFHMIN32-NEXT: lh a1, 610(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 354(sp) -; ZVFHMIN32-NEXT: vmv.x.s s6, v5 -; ZVFHMIN32-NEXT: vmv.x.s s5, v23 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 241(sp) -; ZVFHMIN32-NEXT: lh a0, 608(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 241(sp) +; ZVFHMIN32-NEXT: lh a1, 608(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 352(sp) -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 21 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: lh s4, 848(a2) # 8-byte Folded Reload -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 20 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: lh s3, 848(a2) # 8-byte Folded Reload -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 240(sp) -; ZVFHMIN32-NEXT: lh a0, 606(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 240(sp) +; ZVFHMIN32-NEXT: lh a1, 606(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 350(sp) -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 22 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: lh s2, 848(a2) # 8-byte Folded Reload -; ZVFHMIN32-NEXT: fmv.h.x fa5, t5 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa4, fa3 -; ZVFHMIN32-NEXT: sb a0, 239(sp) -; ZVFHMIN32-NEXT: lh a0, 604(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 239(sp) +; ZVFHMIN32-NEXT: lh a1, 604(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 348(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, t6 -; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 7 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN32-NEXT: sb a0, 238(sp) -; ZVFHMIN32-NEXT: lh a0, 602(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 238(sp) +; ZVFHMIN32-NEXT: lh a1, 602(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 346(sp) -; ZVFHMIN32-NEXT: vmv.x.s a2, v8 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 6 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN32-NEXT: sb a0, 237(sp) -; ZVFHMIN32-NEXT: lh a0, 600(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 237(sp) +; ZVFHMIN32-NEXT: lh a1, 600(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 344(sp) -; ZVFHMIN32-NEXT: vmv.x.s a3, v8 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 5 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN32-NEXT: sb a0, 236(sp) -; ZVFHMIN32-NEXT: lh a0, 598(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 236(sp) +; ZVFHMIN32-NEXT: lh a1, 598(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 342(sp) -; ZVFHMIN32-NEXT: vmv.x.s a4, v8 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 4 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN32-NEXT: sb a0, 235(sp) -; ZVFHMIN32-NEXT: lh a0, 596(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 235(sp) +; ZVFHMIN32-NEXT: lh a1, 596(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 340(sp) -; ZVFHMIN32-NEXT: vmv.x.s s8, v8 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 3 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN32-NEXT: sb a0, 234(sp) -; ZVFHMIN32-NEXT: lh a0, 594(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 234(sp) +; ZVFHMIN32-NEXT: lh a1, 594(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 338(sp) -; ZVFHMIN32-NEXT: vmv.x.s s9, v8 -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 2 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN32-NEXT: sb a0, 233(sp) -; ZVFHMIN32-NEXT: lh a0, 592(sp) -; ZVFHMIN32-NEXT: vmv.x.s a1, v8 -; ZVFHMIN32-NEXT: lh t5, 336(sp) -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 1 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN32-NEXT: vmv.x.s s7, v8 -; ZVFHMIN32-NEXT: fmv.h.x fa2, t5 -; ZVFHMIN32-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN32-NEXT: fmv.h.x fa3, a2 -; ZVFHMIN32-NEXT: sb a0, 232(sp) -; ZVFHMIN32-NEXT: lh a0, 590(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa2, a3 -; ZVFHMIN32-NEXT: lh a2, 334(sp) -; ZVFHMIN32-NEXT: feq.h t5, fa5, fa3 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: feq.h t6, fa4, fa2 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s6 -; ZVFHMIN32-NEXT: sb a0, 231(sp) -; ZVFHMIN32-NEXT: lh a0, 588(sp) -; ZVFHMIN32-NEXT: lh a2, 332(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, a4 -; ZVFHMIN32-NEXT: feq.h a3, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s5 -; ZVFHMIN32-NEXT: sb a0, 230(sp) -; ZVFHMIN32-NEXT: lh a0, 586(sp) -; ZVFHMIN32-NEXT: lh a2, 330(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, s8 -; ZVFHMIN32-NEXT: feq.h a4, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s4 -; ZVFHMIN32-NEXT: sb a0, 229(sp) -; ZVFHMIN32-NEXT: lh a0, 584(sp) -; ZVFHMIN32-NEXT: lh a2, 328(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, s9 -; ZVFHMIN32-NEXT: feq.h s4, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s3 -; ZVFHMIN32-NEXT: sb a0, 228(sp) -; ZVFHMIN32-NEXT: lh a0, 582(sp) -; ZVFHMIN32-NEXT: lh a2, 326(sp) ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s2 -; ZVFHMIN32-NEXT: sb a0, 227(sp) -; ZVFHMIN32-NEXT: lh a0, 580(sp) -; ZVFHMIN32-NEXT: lh a2, 324(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, s7 -; ZVFHMIN32-NEXT: feq.h s2, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 226(sp) -; ZVFHMIN32-NEXT: lh a0, 578(sp) +; ZVFHMIN32-NEXT: sb a1, 233(sp) +; ZVFHMIN32-NEXT: lh a1, 592(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 336(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 232(sp) +; ZVFHMIN32-NEXT: lh a1, 590(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 334(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 231(sp) +; ZVFHMIN32-NEXT: lh a1, 588(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 332(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 230(sp) +; ZVFHMIN32-NEXT: lh a1, 586(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 330(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 229(sp) +; ZVFHMIN32-NEXT: lh a1, 584(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 328(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 228(sp) +; ZVFHMIN32-NEXT: lh a1, 582(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 326(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 227(sp) +; ZVFHMIN32-NEXT: lh a1, 580(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 324(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: fmv.h.x fa5, a4 +; ZVFHMIN32-NEXT: fmv.h.x fa4, a7 +; ZVFHMIN32-NEXT: sb a1, 226(sp) +; ZVFHMIN32-NEXT: lh a1, 578(sp) ; ZVFHMIN32-NEXT: lh a2, 322(sp) -; ZVFHMIN32-NEXT: sb s2, 193(sp) -; ZVFHMIN32-NEXT: sb a1, 194(sp) -; ZVFHMIN32-NEXT: sb s4, 195(sp) -; ZVFHMIN32-NEXT: sb a4, 196(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 +; ZVFHMIN32-NEXT: sb t6, 193(sp) +; ZVFHMIN32-NEXT: sb t5, 194(sp) +; ZVFHMIN32-NEXT: sb t4, 195(sp) +; ZVFHMIN32-NEXT: sb t3, 196(sp) +; ZVFHMIN32-NEXT: feq.h a4, fa5, fa4 +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a3, 197(sp) -; ZVFHMIN32-NEXT: sb t6, 198(sp) -; ZVFHMIN32-NEXT: sb t5, 199(sp) -; ZVFHMIN32-NEXT: sb a0, 225(sp) -; ZVFHMIN32-NEXT: lh a0, 766(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb t2, 197(sp) +; ZVFHMIN32-NEXT: sb t1, 198(sp) +; ZVFHMIN32-NEXT: sb t0, 199(sp) +; ZVFHMIN32-NEXT: sb a1, 225(sp) +; ZVFHMIN32-NEXT: lh a1, 766(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 510(sp) -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 18 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s s2, v8 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: li a3, 14 -; ZVFHMIN32-NEXT: mul a2, a2, a3 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s t6, v8 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 191(sp) -; ZVFHMIN32-NEXT: lh a0, 764(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 191(sp) +; ZVFHMIN32-NEXT: lh a1, 764(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 508(sp) -; ZVFHMIN32-NEXT: vmv.x.s t5, v6 -; ZVFHMIN32-NEXT: csrr a2, vlenb -; ZVFHMIN32-NEXT: slli a2, a2, 2 -; ZVFHMIN32-NEXT: add a2, sp, a2 -; ZVFHMIN32-NEXT: addi a2, a2, 848 -; ZVFHMIN32-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s a2, v8 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 190(sp) -; ZVFHMIN32-NEXT: lh a0, 762(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 190(sp) +; ZVFHMIN32-NEXT: lh a1, 762(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 506(sp) -; ZVFHMIN32-NEXT: csrr a3, vlenb -; ZVFHMIN32-NEXT: slli a3, a3, 3 -; ZVFHMIN32-NEXT: add a3, sp, a3 -; ZVFHMIN32-NEXT: addi a3, a3, 848 -; ZVFHMIN32-NEXT: vl2r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s a3, v8 -; ZVFHMIN32-NEXT: csrr a4, vlenb -; ZVFHMIN32-NEXT: li s3, 6 -; ZVFHMIN32-NEXT: mul a4, a4, s3 -; ZVFHMIN32-NEXT: add a4, sp, a4 -; ZVFHMIN32-NEXT: addi a4, a4, 848 -; ZVFHMIN32-NEXT: vl2r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s a4, v8 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 189(sp) -; ZVFHMIN32-NEXT: lh a0, 760(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 189(sp) +; ZVFHMIN32-NEXT: lh a1, 760(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 504(sp) -; ZVFHMIN32-NEXT: csrr s3, vlenb -; ZVFHMIN32-NEXT: li s4, 12 -; ZVFHMIN32-NEXT: mul s3, s3, s4 -; ZVFHMIN32-NEXT: add s3, sp, s3 -; ZVFHMIN32-NEXT: addi s3, s3, 848 -; ZVFHMIN32-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s s6, v8 -; ZVFHMIN32-NEXT: csrr s3, vlenb -; ZVFHMIN32-NEXT: li s4, 10 -; ZVFHMIN32-NEXT: mul s3, s3, s4 -; ZVFHMIN32-NEXT: add s3, sp, s3 -; ZVFHMIN32-NEXT: addi s3, s3, 848 -; ZVFHMIN32-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s s4, v8 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 188(sp) -; ZVFHMIN32-NEXT: lh a0, 758(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 188(sp) +; ZVFHMIN32-NEXT: lh a1, 758(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 502(sp) -; ZVFHMIN32-NEXT: csrr s3, vlenb -; ZVFHMIN32-NEXT: slli s3, s3, 4 -; ZVFHMIN32-NEXT: add s3, sp, s3 -; ZVFHMIN32-NEXT: addi s3, s3, 848 -; ZVFHMIN32-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s s5, v8 -; ZVFHMIN32-NEXT: vmv.x.s s3, v16 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, t4 -; ZVFHMIN32-NEXT: sb a0, 187(sp) -; ZVFHMIN32-NEXT: lh a0, 756(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 187(sp) +; ZVFHMIN32-NEXT: lh a1, 756(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 500(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN32-NEXT: feq.h t4, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, t3 -; ZVFHMIN32-NEXT: sb a0, 186(sp) -; ZVFHMIN32-NEXT: lh a0, 754(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 186(sp) +; ZVFHMIN32-NEXT: lh a1, 754(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 498(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, a3 -; ZVFHMIN32-NEXT: feq.h t3, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, t1 -; ZVFHMIN32-NEXT: sb a0, 185(sp) -; ZVFHMIN32-NEXT: lh a0, 752(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 185(sp) +; ZVFHMIN32-NEXT: lh a1, 752(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 496(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, a4 -; ZVFHMIN32-NEXT: feq.h t1, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, t2 -; ZVFHMIN32-NEXT: sb a0, 184(sp) -; ZVFHMIN32-NEXT: lh a0, 750(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 184(sp) +; ZVFHMIN32-NEXT: lh a1, 750(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 494(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, s6 -; ZVFHMIN32-NEXT: feq.h a2, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, t0 -; ZVFHMIN32-NEXT: sb a0, 183(sp) -; ZVFHMIN32-NEXT: lh a0, 748(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 183(sp) +; ZVFHMIN32-NEXT: lh a1, 748(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 492(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, s4 -; ZVFHMIN32-NEXT: feq.h a3, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a7 -; ZVFHMIN32-NEXT: sb a0, 182(sp) -; ZVFHMIN32-NEXT: lh a0, 746(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 182(sp) +; ZVFHMIN32-NEXT: lh a1, 746(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 490(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, s5 -; ZVFHMIN32-NEXT: feq.h a4, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a6 -; ZVFHMIN32-NEXT: sb a0, 181(sp) -; ZVFHMIN32-NEXT: lh a0, 744(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 181(sp) +; ZVFHMIN32-NEXT: lh a1, 744(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: lh a1, 488(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, s3 -; ZVFHMIN32-NEXT: feq.h a6, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a5 -; ZVFHMIN32-NEXT: addi a1, sp, 848 -; ZVFHMIN32-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN32-NEXT: vmv.x.s a1, v8 -; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 15 -; ZVFHMIN32-NEXT: vmv.x.s a5, v8 -; ZVFHMIN32-NEXT: sb a0, 180(sp) -; ZVFHMIN32-NEXT: lh a0, 742(sp) -; ZVFHMIN32-NEXT: lh a7, 486(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: sb a1, 180(sp) +; ZVFHMIN32-NEXT: lh a1, 742(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN32-NEXT: lh a1, 486(sp) ; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a7 -; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: sb a0, 179(sp) -; ZVFHMIN32-NEXT: lh a0, 740(sp) -; ZVFHMIN32-NEXT: lh a7, 484(sp) -; ZVFHMIN32-NEXT: sb a2, 140(sp) -; ZVFHMIN32-NEXT: sb t1, 141(sp) -; ZVFHMIN32-NEXT: sb t3, 142(sp) -; ZVFHMIN32-NEXT: sb t4, 143(sp) -; ZVFHMIN32-NEXT: sb a1, 136(sp) -; ZVFHMIN32-NEXT: sb a6, 137(sp) -; ZVFHMIN32-NEXT: sb a4, 138(sp) -; ZVFHMIN32-NEXT: sb a3, 139(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a7 +; ZVFHMIN32-NEXT: lw a2, 120(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x fa5, a2 +; ZVFHMIN32-NEXT: lw a2, 116(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 +; ZVFHMIN32-NEXT: sb a1, 179(sp) +; ZVFHMIN32-NEXT: lh a2, 740(sp) +; ZVFHMIN32-NEXT: lh a3, 484(sp) +; ZVFHMIN32-NEXT: sb s2, 140(sp) +; ZVFHMIN32-NEXT: sb a6, 141(sp) +; ZVFHMIN32-NEXT: sb a5, 142(sp) +; ZVFHMIN32-NEXT: sb a0, 143(sp) +; ZVFHMIN32-NEXT: sb ra, 136(sp) +; ZVFHMIN32-NEXT: sb s9, 137(sp) +; ZVFHMIN32-NEXT: sb s10, 138(sp) +; ZVFHMIN32-NEXT: sb s11, 139(sp) +; ZVFHMIN32-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN32-NEXT: fmv.h.x fa5, a2 +; ZVFHMIN32-NEXT: fmv.h.x fa4, a3 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN32-NEXT: sb a0, 178(sp) ; ZVFHMIN32-NEXT: lh a0, 638(sp) -; ZVFHMIN32-NEXT: lh a1, 382(sp) -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 14 -; ZVFHMIN32-NEXT: vmv.x.s t3, v8 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 382(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN32-NEXT: sb a0, 255(sp) ; ZVFHMIN32-NEXT: lh a0, 636(sp) -; ZVFHMIN32-NEXT: lh a1, 380(sp) -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 13 -; ZVFHMIN32-NEXT: vmv.x.s t2, v8 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 380(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN32-NEXT: sb a0, 254(sp) ; ZVFHMIN32-NEXT: lh a0, 634(sp) -; ZVFHMIN32-NEXT: lh a1, 378(sp) -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 12 -; ZVFHMIN32-NEXT: vmv.x.s t1, v8 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 378(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN32-NEXT: sb a0, 253(sp) ; ZVFHMIN32-NEXT: lh a0, 632(sp) -; ZVFHMIN32-NEXT: lh a1, 376(sp) -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 11 -; ZVFHMIN32-NEXT: vmv.x.s t0, v8 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 376(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN32-NEXT: sb a0, 252(sp) ; ZVFHMIN32-NEXT: lh a0, 630(sp) -; ZVFHMIN32-NEXT: lh a1, 374(sp) -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 10 -; ZVFHMIN32-NEXT: vmv.x.s a7, v8 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 374(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN32-NEXT: sb a0, 251(sp) ; ZVFHMIN32-NEXT: lh a0, 628(sp) -; ZVFHMIN32-NEXT: lh a1, 372(sp) -; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 9 -; ZVFHMIN32-NEXT: vmv.x.s a6, v8 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 372(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: lw a1, 112(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: sb a0, 250(sp) ; ZVFHMIN32-NEXT: lh a0, 626(sp) -; ZVFHMIN32-NEXT: lh a1, 370(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, a5 -; ZVFHMIN32-NEXT: feq.h a3, fa5, fa4 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 370(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: lw a1, 116(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: sb a0, 249(sp) ; ZVFHMIN32-NEXT: lh a0, 624(sp) -; ZVFHMIN32-NEXT: lh a1, 368(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, t3 -; ZVFHMIN32-NEXT: feq.h a2, fa5, fa4 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 368(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: lw a1, 124(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: sb a0, 248(sp) ; ZVFHMIN32-NEXT: lh a0, 622(sp) -; ZVFHMIN32-NEXT: lh a1, 366(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, t2 -; ZVFHMIN32-NEXT: feq.h a4, fa5, fa4 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 366(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: lw a1, 108(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: sb a0, 247(sp) ; ZVFHMIN32-NEXT: lh a0, 620(sp) -; ZVFHMIN32-NEXT: lh a1, 364(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, t1 -; ZVFHMIN32-NEXT: feq.h a5, fa5, fa4 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 364(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: lw a1, 120(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN32-NEXT: sb a0, 246(sp) ; ZVFHMIN32-NEXT: lh a0, 618(sp) -; ZVFHMIN32-NEXT: lh a1, 362(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, t0 -; ZVFHMIN32-NEXT: feq.h t0, fa5, fa4 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 362(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, s2 ; ZVFHMIN32-NEXT: sb a0, 245(sp) ; ZVFHMIN32-NEXT: lh a0, 616(sp) -; ZVFHMIN32-NEXT: lh a1, 360(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, a7 -; ZVFHMIN32-NEXT: feq.h a7, fa5, fa4 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 360(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, t6 ; ZVFHMIN32-NEXT: sb a0, 244(sp) ; ZVFHMIN32-NEXT: lh a0, 614(sp) -; ZVFHMIN32-NEXT: lh a1, 358(sp) -; ZVFHMIN32-NEXT: fmv.h.x fa4, a6 -; ZVFHMIN32-NEXT: feq.h a6, fa5, fa4 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: lh a0, 358(sp) +; ZVFHMIN32-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN32-NEXT: fmv.h.x fa5, t5 +; ZVFHMIN32-NEXT: lw a2, 124(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: fmv.h.x fa5, a2 +; ZVFHMIN32-NEXT: vsetivli zero, 1, e16, m2, ta, ma ; ZVFHMIN32-NEXT: vslidedown.vi v8, v24, 8 -; ZVFHMIN32-NEXT: vmv.x.s a1, v8 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: vmv.x.s a2, v8 +; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 ; ZVFHMIN32-NEXT: sb a0, 243(sp) ; ZVFHMIN32-NEXT: lh a0, 612(sp) -; ZVFHMIN32-NEXT: lh a1, 356(sp) -; ZVFHMIN32-NEXT: sb a5, 204(sp) -; ZVFHMIN32-NEXT: sb a4, 205(sp) -; ZVFHMIN32-NEXT: sb a2, 206(sp) -; ZVFHMIN32-NEXT: sb a3, 207(sp) -; ZVFHMIN32-NEXT: feq.h a2, fa5, fa4 -; ZVFHMIN32-NEXT: sb a2, 200(sp) -; ZVFHMIN32-NEXT: sb a6, 201(sp) -; ZVFHMIN32-NEXT: sb a7, 202(sp) -; ZVFHMIN32-NEXT: sb t0, 203(sp) -; ZVFHMIN32-NEXT: li a2, 128 +; ZVFHMIN32-NEXT: lh a2, 356(sp) +; ZVFHMIN32-NEXT: sb s6, 204(sp) +; ZVFHMIN32-NEXT: sb s8, 205(sp) +; ZVFHMIN32-NEXT: sb s7, 206(sp) +; ZVFHMIN32-NEXT: sb s3, 207(sp) +; ZVFHMIN32-NEXT: feq.h a3, fa5, fa4 +; ZVFHMIN32-NEXT: sb a3, 200(sp) +; ZVFHMIN32-NEXT: sb a1, 201(sp) +; ZVFHMIN32-NEXT: sb a4, 202(sp) +; ZVFHMIN32-NEXT: sb s4, 203(sp) +; ZVFHMIN32-NEXT: li a1, 128 ; ZVFHMIN32-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN32-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN32-NEXT: fmv.h.x fa4, a2 ; ZVFHMIN32-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN32-NEXT: sb a0, 242(sp) ; ZVFHMIN32-NEXT: addi a0, sp, 128 -; ZVFHMIN32-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; ZVFHMIN32-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; ZVFHMIN32-NEXT: vle8.v v8, (a0) ; ZVFHMIN32-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN32-NEXT: vmsne.vi v0, v8, 0 -; ZVFHMIN32-NEXT: addi sp, s0, -896 -; ZVFHMIN32-NEXT: .cfi_def_cfa sp, 896 -; ZVFHMIN32-NEXT: lw ra, 892(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s0, 888(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s2, 884(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s3, 880(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s4, 876(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s5, 872(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s6, 868(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s7, 864(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s8, 860(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s9, 856(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s10, 852(sp) # 4-byte Folded Reload -; ZVFHMIN32-NEXT: lw s11, 848(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: addi sp, s0, -1024 +; ZVFHMIN32-NEXT: .cfi_def_cfa sp, 1024 +; ZVFHMIN32-NEXT: lw ra, 1020(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s0, 1016(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s2, 1012(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s3, 1008(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s4, 1004(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s5, 1000(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s6, 996(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s7, 992(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s8, 988(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s9, 984(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s10, 980(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: lw s11, 976(sp) # 4-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs0, 968(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs1, 960(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs2, 952(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs3, 944(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs4, 936(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs5, 928(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs6, 920(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs7, 912(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs8, 904(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs9, 896(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs10, 888(sp) # 8-byte Folded Reload +; ZVFHMIN32-NEXT: fld fs11, 880(sp) # 8-byte Folded Reload ; ZVFHMIN32-NEXT: .cfi_restore ra ; ZVFHMIN32-NEXT: .cfi_restore s0 ; ZVFHMIN32-NEXT: .cfi_restore s2 @@ -2242,26 +2333,50 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 ; ZVFHMIN32-NEXT: .cfi_restore s9 ; ZVFHMIN32-NEXT: .cfi_restore s10 ; ZVFHMIN32-NEXT: .cfi_restore s11 -; ZVFHMIN32-NEXT: addi sp, sp, 896 +; ZVFHMIN32-NEXT: .cfi_restore fs0 +; ZVFHMIN32-NEXT: .cfi_restore fs1 +; ZVFHMIN32-NEXT: .cfi_restore fs2 +; ZVFHMIN32-NEXT: .cfi_restore fs3 +; ZVFHMIN32-NEXT: .cfi_restore fs4 +; ZVFHMIN32-NEXT: .cfi_restore fs5 +; ZVFHMIN32-NEXT: .cfi_restore fs6 +; ZVFHMIN32-NEXT: .cfi_restore fs7 +; ZVFHMIN32-NEXT: .cfi_restore fs8 +; ZVFHMIN32-NEXT: .cfi_restore fs9 +; ZVFHMIN32-NEXT: .cfi_restore fs10 +; ZVFHMIN32-NEXT: .cfi_restore fs11 +; ZVFHMIN32-NEXT: addi sp, sp, 1024 ; ZVFHMIN32-NEXT: .cfi_def_cfa_offset 0 ; ZVFHMIN32-NEXT: ret ; ; ZVFHMIN64-LABEL: fcmp_oeq_vv_v128f16: ; ZVFHMIN64: # %bb.0: -; ZVFHMIN64-NEXT: addi sp, sp, -896 -; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 896 -; ZVFHMIN64-NEXT: sd ra, 888(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s0, 880(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s2, 872(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s3, 864(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s4, 856(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s5, 848(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s6, 840(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s7, 832(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s8, 824(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s9, 816(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s10, 808(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: sd s11, 800(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: addi sp, sp, -1024 +; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 1024 +; ZVFHMIN64-NEXT: sd ra, 1016(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s0, 1008(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s2, 1000(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s3, 992(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s4, 984(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s5, 976(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s6, 968(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s7, 960(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s8, 952(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s9, 944(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s10, 936(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: sd s11, 928(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs0, 920(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs1, 912(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs2, 904(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs3, 896(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs4, 888(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs5, 880(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs6, 872(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs7, 864(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs8, 856(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs9, 848(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs10, 840(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fsd fs11, 832(sp) # 8-byte Folded Spill ; ZVFHMIN64-NEXT: .cfi_offset ra, -8 ; ZVFHMIN64-NEXT: .cfi_offset s0, -16 ; ZVFHMIN64-NEXT: .cfi_offset s2, -24 @@ -2274,1096 +2389,1175 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 ; ZVFHMIN64-NEXT: .cfi_offset s9, -80 ; ZVFHMIN64-NEXT: .cfi_offset s10, -88 ; ZVFHMIN64-NEXT: .cfi_offset s11, -96 -; ZVFHMIN64-NEXT: addi s0, sp, 896 +; ZVFHMIN64-NEXT: .cfi_offset fs0, -104 +; ZVFHMIN64-NEXT: .cfi_offset fs1, -112 +; ZVFHMIN64-NEXT: .cfi_offset fs2, -120 +; ZVFHMIN64-NEXT: .cfi_offset fs3, -128 +; ZVFHMIN64-NEXT: .cfi_offset fs4, -136 +; ZVFHMIN64-NEXT: .cfi_offset fs5, -144 +; ZVFHMIN64-NEXT: .cfi_offset fs6, -152 +; ZVFHMIN64-NEXT: .cfi_offset fs7, -160 +; ZVFHMIN64-NEXT: .cfi_offset fs8, -168 +; ZVFHMIN64-NEXT: .cfi_offset fs9, -176 +; ZVFHMIN64-NEXT: .cfi_offset fs10, -184 +; ZVFHMIN64-NEXT: .cfi_offset fs11, -192 +; ZVFHMIN64-NEXT: addi s0, sp, 1024 ; ZVFHMIN64-NEXT: .cfi_def_cfa s0, 0 ; ZVFHMIN64-NEXT: csrr a1, vlenb -; ZVFHMIN64-NEXT: li a2, 30 +; ZVFHMIN64-NEXT: li a2, 41 ; ZVFHMIN64-NEXT: mul a1, a1, a2 ; ZVFHMIN64-NEXT: sub sp, sp, a1 ; ZVFHMIN64-NEXT: andi sp, sp, -128 -; ZVFHMIN64-NEXT: addi a1, a0, 128 -; ZVFHMIN64-NEXT: li a2, 64 -; ZVFHMIN64-NEXT: addi a3, sp, 640 -; ZVFHMIN64-NEXT: addi a4, sp, 384 -; ZVFHMIN64-NEXT: addi a5, sp, 512 -; ZVFHMIN64-NEXT: vsetvli zero, a2, e16, m8, ta, ma +; ZVFHMIN64-NEXT: addi a3, a0, 128 +; ZVFHMIN64-NEXT: li a1, 64 +; ZVFHMIN64-NEXT: addi a4, sp, 640 +; ZVFHMIN64-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN64-NEXT: vmv.x.s a2, v8 +; ZVFHMIN64-NEXT: vle16.v v24, (a3) +; ZVFHMIN64-NEXT: csrr a3, vlenb +; ZVFHMIN64-NEXT: slli a5, a3, 5 +; ZVFHMIN64-NEXT: add a3, a5, a3 +; ZVFHMIN64-NEXT: add a3, sp, a3 +; ZVFHMIN64-NEXT: addi a3, a3, 832 +; ZVFHMIN64-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill ; ZVFHMIN64-NEXT: vle16.v v0, (a0) -; ZVFHMIN64-NEXT: addi a0, sp, 256 -; ZVFHMIN64-NEXT: vle16.v v24, (a1) -; ZVFHMIN64-NEXT: vse16.v v8, (a3) -; ZVFHMIN64-NEXT: vse16.v v0, (a4) -; ZVFHMIN64-NEXT: vse16.v v16, (a5) -; ZVFHMIN64-NEXT: vse16.v v24, (a0) -; ZVFHMIN64-NEXT: lh a0, 704(sp) +; ZVFHMIN64-NEXT: vse16.v v8, (a4) +; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 7 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a0, a0, 5 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 6 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a3, a0, 5 +; ZVFHMIN64-NEXT: sub a0, a3, a0 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 5 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 30 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 4 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 29 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 3 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 28 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 2 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 27 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 1 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 26 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 15 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 24 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 14 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 22 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 13 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 20 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 12 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 18 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 11 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a3, a0, 3 +; ZVFHMIN64-NEXT: add a0, a3, a0 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 10 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a3, a0, 3 +; ZVFHMIN64-NEXT: sub a0, a3, a0 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 9 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a3, a0, 2 +; ZVFHMIN64-NEXT: add a0, a3, a0 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs2r.v v10, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v8, v8, 8 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a3, a0, 1 +; ZVFHMIN64-NEXT: add a0, a3, a0 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: addi a0, sp, 384 +; ZVFHMIN64-NEXT: addi a3, sp, 512 +; ZVFHMIN64-NEXT: vmv.x.s a5, v16 +; ZVFHMIN64-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN64-NEXT: vse16.v v0, (a0) +; ZVFHMIN64-NEXT: vse16.v v16, (a3) +; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 7 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 11 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 6 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 12 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 5 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 13 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 4 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 14 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 3 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a3, a0, 4 +; ZVFHMIN64-NEXT: sub a0, a3, a0 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 2 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a0, a0, 4 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 1 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a3, a0, 4 +; ZVFHMIN64-NEXT: add a0, a3, a0 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 15 +; ZVFHMIN64-NEXT: addi a0, sp, 832 +; ZVFHMIN64-NEXT: vs2r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vslidedown.vi v4, v16, 14 +; ZVFHMIN64-NEXT: vslidedown.vi v2, v16, 13 +; ZVFHMIN64-NEXT: vslidedown.vi v24, v16, 12 +; ZVFHMIN64-NEXT: vslidedown.vi v22, v16, 11 +; ZVFHMIN64-NEXT: vslidedown.vi v20, v16, 10 +; ZVFHMIN64-NEXT: vslidedown.vi v18, v16, 9 +; ZVFHMIN64-NEXT: vslidedown.vi v16, v16, 8 +; ZVFHMIN64-NEXT: vmv.x.s a6, v0 +; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN64-NEXT: vslidedown.vi v5, v0, 7 +; ZVFHMIN64-NEXT: vslidedown.vi v17, v0, 6 +; ZVFHMIN64-NEXT: vslidedown.vi v23, v0, 5 +; ZVFHMIN64-NEXT: vslidedown.vi v19, v0, 4 +; ZVFHMIN64-NEXT: vslidedown.vi v21, v0, 3 +; ZVFHMIN64-NEXT: vslidedown.vi v3, v0, 2 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v0, 1 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a0, a0, 1 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFHMIN64-NEXT: vslidedown.vi v8, v0, 15 +; ZVFHMIN64-NEXT: vslidedown.vi v10, v0, 14 +; ZVFHMIN64-NEXT: vslidedown.vi v12, v0, 13 +; ZVFHMIN64-NEXT: vslidedown.vi v14, v0, 12 +; ZVFHMIN64-NEXT: vslidedown.vi v26, v0, 11 +; ZVFHMIN64-NEXT: vslidedown.vi v28, v0, 10 +; ZVFHMIN64-NEXT: vslidedown.vi v30, v0, 9 +; ZVFHMIN64-NEXT: vslidedown.vi v0, v0, 8 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: li a3, 24 +; ZVFHMIN64-NEXT: mul a0, a0, a3 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vl2r.v v6, (a0) # Unknown-size Folded Reload +; ZVFHMIN64-NEXT: vmv.x.s a0, v6 +; ZVFHMIN64-NEXT: csrr a3, vlenb +; ZVFHMIN64-NEXT: li a4, 22 +; ZVFHMIN64-NEXT: mul a3, a3, a4 +; ZVFHMIN64-NEXT: add a3, sp, a3 +; ZVFHMIN64-NEXT: addi a3, a3, 832 +; ZVFHMIN64-NEXT: vl2r.v v6, (a3) # Unknown-size Folded Reload +; ZVFHMIN64-NEXT: vmv.x.s a3, v6 +; ZVFHMIN64-NEXT: csrr a4, vlenb +; ZVFHMIN64-NEXT: li a7, 20 +; ZVFHMIN64-NEXT: mul a4, a4, a7 +; ZVFHMIN64-NEXT: add a4, sp, a4 +; ZVFHMIN64-NEXT: addi a4, a4, 832 +; ZVFHMIN64-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN64-NEXT: vmv.x.s a7, v6 +; ZVFHMIN64-NEXT: csrr a4, vlenb +; ZVFHMIN64-NEXT: li t0, 18 +; ZVFHMIN64-NEXT: mul a4, a4, t0 +; ZVFHMIN64-NEXT: add a4, sp, a4 +; ZVFHMIN64-NEXT: addi a4, a4, 832 +; ZVFHMIN64-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN64-NEXT: vmv.x.s s3, v6 +; ZVFHMIN64-NEXT: csrr a4, vlenb +; ZVFHMIN64-NEXT: slli t0, a4, 3 +; ZVFHMIN64-NEXT: add a4, t0, a4 +; ZVFHMIN64-NEXT: add a4, sp, a4 +; ZVFHMIN64-NEXT: addi a4, a4, 832 +; ZVFHMIN64-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN64-NEXT: vmv.x.s s10, v6 +; ZVFHMIN64-NEXT: csrr a4, vlenb +; ZVFHMIN64-NEXT: slli t0, a4, 3 +; ZVFHMIN64-NEXT: sub a4, t0, a4 +; ZVFHMIN64-NEXT: add a4, sp, a4 +; ZVFHMIN64-NEXT: addi a4, a4, 832 +; ZVFHMIN64-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN64-NEXT: vmv.x.s s11, v6 +; ZVFHMIN64-NEXT: csrr a4, vlenb +; ZVFHMIN64-NEXT: slli t0, a4, 2 +; ZVFHMIN64-NEXT: add a4, t0, a4 +; ZVFHMIN64-NEXT: add a4, sp, a4 +; ZVFHMIN64-NEXT: addi a4, a4, 832 +; ZVFHMIN64-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN64-NEXT: vmv.x.s s5, v6 +; ZVFHMIN64-NEXT: csrr a4, vlenb +; ZVFHMIN64-NEXT: slli t0, a4, 1 +; ZVFHMIN64-NEXT: add a4, t0, a4 +; ZVFHMIN64-NEXT: add a4, sp, a4 +; ZVFHMIN64-NEXT: addi a4, a4, 832 +; ZVFHMIN64-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN64-NEXT: vmv.x.s s7, v6 +; ZVFHMIN64-NEXT: addi a4, sp, 832 +; ZVFHMIN64-NEXT: vl2r.v v6, (a4) # Unknown-size Folded Reload +; ZVFHMIN64-NEXT: vmv.x.s s9, v6 +; ZVFHMIN64-NEXT: vmv.x.s s8, v4 +; ZVFHMIN64-NEXT: vmv.x.s s6, v2 +; ZVFHMIN64-NEXT: vmv.x.s s4, v24 +; ZVFHMIN64-NEXT: vmv.x.s s2, v22 +; ZVFHMIN64-NEXT: vmv.x.s a4, v20 +; ZVFHMIN64-NEXT: vmv.x.s t0, v18 +; ZVFHMIN64-NEXT: sd t0, 112(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: vmv.x.s t0, v16 +; ZVFHMIN64-NEXT: sd t0, 120(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: vmv.x.s t6, v8 +; ZVFHMIN64-NEXT: vmv.x.s t0, v10 +; ZVFHMIN64-NEXT: vmv.x.s t1, v12 +; ZVFHMIN64-NEXT: vmv.x.s t2, v14 +; ZVFHMIN64-NEXT: vmv.x.s t3, v26 +; ZVFHMIN64-NEXT: vmv.x.s t4, v28 +; ZVFHMIN64-NEXT: vmv.x.s t5, v30 +; ZVFHMIN64-NEXT: fmv.h.x fs8, a2 +; ZVFHMIN64-NEXT: fmv.h.x fs7, a5 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: slli a2, a2, 5 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x fs6, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: slli a5, a2, 5 +; ZVFHMIN64-NEXT: sub a2, a5, a2 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x fs5, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: li a5, 30 +; ZVFHMIN64-NEXT: mul a2, a2, a5 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x ft10, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: li a5, 29 +; ZVFHMIN64-NEXT: mul a2, a2, a5 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x ft8, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: li a5, 28 +; ZVFHMIN64-NEXT: mul a2, a2, a5 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x ft2, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: li a5, 27 +; ZVFHMIN64-NEXT: mul a2, a2, a5 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x ft3, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: li a5, 26 +; ZVFHMIN64-NEXT: mul a2, a2, a5 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x ft4, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: li a5, 11 +; ZVFHMIN64-NEXT: mul a2, a2, a5 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x ft5, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: li a5, 12 +; ZVFHMIN64-NEXT: mul a2, a2, a5 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x ft6, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: li a5, 13 +; ZVFHMIN64-NEXT: mul a2, a2, a5 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x fa6, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: li a5, 14 +; ZVFHMIN64-NEXT: mul a2, a2, a5 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x fs0, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: slli a5, a2, 4 +; ZVFHMIN64-NEXT: sub a2, a5, a2 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x fs1, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: slli a2, a2, 4 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x fs2, a2 +; ZVFHMIN64-NEXT: csrr a2, vlenb +; ZVFHMIN64-NEXT: slli a5, a2, 4 +; ZVFHMIN64-NEXT: add a2, a5, a2 +; ZVFHMIN64-NEXT: add a2, sp, a2 +; ZVFHMIN64-NEXT: lh a2, 832(a2) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x fs3, a2 +; ZVFHMIN64-NEXT: addi a2, sp, 256 +; ZVFHMIN64-NEXT: fmv.h.x fs4, a0 +; ZVFHMIN64-NEXT: fmv.h.x ft7, a3 +; ZVFHMIN64-NEXT: fmv.h.x ft11, a7 +; ZVFHMIN64-NEXT: fmv.h.x ft9, s3 +; ZVFHMIN64-NEXT: fmv.h.x fa7, s10 +; ZVFHMIN64-NEXT: fmv.h.x fa5, s11 +; ZVFHMIN64-NEXT: fsh fa5, 102(sp) # 2-byte Folded Spill +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a3, a0, 5 +; ZVFHMIN64-NEXT: add a0, a3, a0 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: addi a0, a0, 832 +; ZVFHMIN64-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFHMIN64-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; ZVFHMIN64-NEXT: vse16.v v24, (a2) +; ZVFHMIN64-NEXT: vmv.x.s a3, v0 +; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 15 +; ZVFHMIN64-NEXT: vmv.x.s a5, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 14 +; ZVFHMIN64-NEXT: vmv.x.s ra, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 13 +; ZVFHMIN64-NEXT: vmv.x.s a2, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 12 +; ZVFHMIN64-NEXT: vmv.x.s a1, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 11 +; ZVFHMIN64-NEXT: vmv.x.s s3, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 10 +; ZVFHMIN64-NEXT: vmv.x.s a7, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 9 +; ZVFHMIN64-NEXT: vmv.x.s a0, v8 +; ZVFHMIN64-NEXT: sd a0, 104(sp) # 8-byte Folded Spill +; ZVFHMIN64-NEXT: fmv.h.x fa3, s5 +; ZVFHMIN64-NEXT: vmv.x.s s5, v5 +; ZVFHMIN64-NEXT: fmv.h.x fa2, s7 +; ZVFHMIN64-NEXT: vmv.x.s s7, v17 +; ZVFHMIN64-NEXT: fmv.h.x fa1, s9 +; ZVFHMIN64-NEXT: vmv.x.s s9, v23 +; ZVFHMIN64-NEXT: fmv.h.x fa0, s8 +; ZVFHMIN64-NEXT: vmv.x.s s8, v19 +; ZVFHMIN64-NEXT: fmv.h.x ft0, s6 +; ZVFHMIN64-NEXT: vmv.x.s s6, v21 +; ZVFHMIN64-NEXT: fmv.h.x ft1, s4 +; ZVFHMIN64-NEXT: vmv.x.s s10, v3 +; ZVFHMIN64-NEXT: fmv.h.x fa5, s2 +; ZVFHMIN64-NEXT: fsh fa5, 100(sp) # 2-byte Folded Spill +; ZVFHMIN64-NEXT: vmv.x.s s2, v24 +; ZVFHMIN64-NEXT: fmv.h.x fs9, a6 +; ZVFHMIN64-NEXT: csrr a0, vlenb +; ZVFHMIN64-NEXT: slli a0, a0, 1 +; ZVFHMIN64-NEXT: add a0, sp, a0 +; ZVFHMIN64-NEXT: lh a6, 832(a0) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 7 +; ZVFHMIN64-NEXT: fmv.h.x fs10, s2 +; ZVFHMIN64-NEXT: vmv.x.s a0, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 6 +; ZVFHMIN64-NEXT: fmv.h.x fs11, s5 +; ZVFHMIN64-NEXT: feq.h s2, fs8, fs9 +; ZVFHMIN64-NEXT: fmv.h.x fs8, s7 +; ZVFHMIN64-NEXT: vmv.x.s s7, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 5 +; ZVFHMIN64-NEXT: fmv.h.x fs9, s9 +; ZVFHMIN64-NEXT: feq.h s11, fs7, fs10 +; ZVFHMIN64-NEXT: fmv.h.x fs7, s8 +; ZVFHMIN64-NEXT: vmv.x.s s8, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 4 +; ZVFHMIN64-NEXT: fmv.h.x fs10, s6 +; ZVFHMIN64-NEXT: feq.h s4, fs6, fs11 +; ZVFHMIN64-NEXT: fmv.h.x fs6, s10 +; ZVFHMIN64-NEXT: vmv.x.s s9, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 3 +; ZVFHMIN64-NEXT: fmv.h.x fs11, a6 +; ZVFHMIN64-NEXT: feq.h s5, fs5, fs8 +; ZVFHMIN64-NEXT: fmv.h.x fs5, a0 +; ZVFHMIN64-NEXT: vmv.x.s a0, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 2 +; ZVFHMIN64-NEXT: fmv.h.x fs8, s7 +; ZVFHMIN64-NEXT: feq.h s6, ft10, fs9 +; ZVFHMIN64-NEXT: fmv.h.x fs9, s8 +; ZVFHMIN64-NEXT: vmv.x.s a6, v8 +; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 1 +; ZVFHMIN64-NEXT: fmv.h.x fa5, s9 +; ZVFHMIN64-NEXT: feq.h s7, ft8, fs7 +; ZVFHMIN64-NEXT: fmv.h.x fs7, a0 +; ZVFHMIN64-NEXT: vmv.x.s a0, v8 +; ZVFHMIN64-NEXT: fmv.h.x fa4, a6 +; ZVFHMIN64-NEXT: feq.h s8, ft2, fs10 +; ZVFHMIN64-NEXT: fmv.h.x fs10, a0 +; ZVFHMIN64-NEXT: feq.h s9, ft3, fs6 +; ZVFHMIN64-NEXT: fmv.h.x fs6, t6 +; ZVFHMIN64-NEXT: feq.h s10, ft4, fs11 +; ZVFHMIN64-NEXT: fmv.h.x fs11, t0 +; ZVFHMIN64-NEXT: feq.h t0, ft5, fs5 +; ZVFHMIN64-NEXT: fmv.h.x fs5, t1 +; ZVFHMIN64-NEXT: feq.h t1, ft6, fs8 +; ZVFHMIN64-NEXT: fmv.h.x ft10, t2 +; ZVFHMIN64-NEXT: feq.h t2, fa6, fs9 +; ZVFHMIN64-NEXT: fmv.h.x ft8, t3 +; ZVFHMIN64-NEXT: feq.h t3, fs0, fa5 +; ZVFHMIN64-NEXT: fmv.h.x ft2, t4 +; ZVFHMIN64-NEXT: feq.h t4, fs1, fs7 +; ZVFHMIN64-NEXT: fmv.h.x ft3, t5 +; ZVFHMIN64-NEXT: feq.h t5, fs2, fa4 +; ZVFHMIN64-NEXT: fmv.h.x ft4, a3 +; ZVFHMIN64-NEXT: feq.h t6, fs3, fs10 +; ZVFHMIN64-NEXT: fmv.h.x ft5, a5 +; ZVFHMIN64-NEXT: feq.h a0, fs4, fs6 +; ZVFHMIN64-NEXT: fmv.h.x ft6, ra +; ZVFHMIN64-NEXT: feq.h a5, ft7, fs11 +; ZVFHMIN64-NEXT: fmv.h.x ft7, a2 +; ZVFHMIN64-NEXT: lh a2, 704(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa6, a1 +; ZVFHMIN64-NEXT: feq.h a6, ft11, fs5 +; ZVFHMIN64-NEXT: fmv.h.x fa5, a2 ; ZVFHMIN64-NEXT: lh a1, 448(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 160(sp) -; ZVFHMIN64-NEXT: lh a0, 702(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 160(sp) +; ZVFHMIN64-NEXT: lh a1, 702(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 446(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 159(sp) -; ZVFHMIN64-NEXT: lh a0, 700(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 159(sp) +; ZVFHMIN64-NEXT: lh a1, 700(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 444(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 158(sp) -; ZVFHMIN64-NEXT: lh a0, 698(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 158(sp) +; ZVFHMIN64-NEXT: lh a1, 698(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 442(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 157(sp) -; ZVFHMIN64-NEXT: lh a0, 696(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 157(sp) +; ZVFHMIN64-NEXT: lh a1, 696(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 440(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 156(sp) -; ZVFHMIN64-NEXT: lh a0, 694(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 156(sp) +; ZVFHMIN64-NEXT: lh a1, 694(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 438(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 155(sp) -; ZVFHMIN64-NEXT: lh a0, 692(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 155(sp) +; ZVFHMIN64-NEXT: lh a1, 692(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 436(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 154(sp) -; ZVFHMIN64-NEXT: lh a0, 690(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 154(sp) +; ZVFHMIN64-NEXT: lh a1, 690(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 434(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 153(sp) -; ZVFHMIN64-NEXT: lh a0, 688(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 153(sp) +; ZVFHMIN64-NEXT: lh a1, 688(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 432(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 152(sp) -; ZVFHMIN64-NEXT: lh a0, 686(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 152(sp) +; ZVFHMIN64-NEXT: lh a1, 686(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 430(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 151(sp) -; ZVFHMIN64-NEXT: lh a0, 684(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 151(sp) +; ZVFHMIN64-NEXT: lh a1, 684(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 428(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 150(sp) -; ZVFHMIN64-NEXT: lh a0, 682(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 150(sp) +; ZVFHMIN64-NEXT: lh a1, 682(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 426(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 149(sp) -; ZVFHMIN64-NEXT: lh a0, 680(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 149(sp) +; ZVFHMIN64-NEXT: lh a1, 680(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 424(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 148(sp) -; ZVFHMIN64-NEXT: lh a0, 678(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 148(sp) +; ZVFHMIN64-NEXT: lh a1, 678(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 422(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 147(sp) -; ZVFHMIN64-NEXT: lh a0, 676(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 147(sp) +; ZVFHMIN64-NEXT: lh a1, 676(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 420(sp) -; ZVFHMIN64-NEXT: vmv.x.s a2, v8 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 146(sp) -; ZVFHMIN64-NEXT: lh a0, 674(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 146(sp) +; ZVFHMIN64-NEXT: lh a1, 674(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 418(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a2 -; ZVFHMIN64-NEXT: vmv.x.s a2, v0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa4, fa3 -; ZVFHMIN64-NEXT: sb a0, 145(sp) -; ZVFHMIN64-NEXT: lh a0, 672(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 145(sp) +; ZVFHMIN64-NEXT: lh a1, 672(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 416(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN64-NEXT: feq.h a2, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a2, 128(sp) -; ZVFHMIN64-NEXT: sb a0, 144(sp) -; ZVFHMIN64-NEXT: lh a0, 576(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb s2, 128(sp) +; ZVFHMIN64-NEXT: feq.h s2, ft9, ft10 +; ZVFHMIN64-NEXT: sb a1, 144(sp) +; ZVFHMIN64-NEXT: lh a1, 576(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 320(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 224(sp) -; ZVFHMIN64-NEXT: lh a0, 574(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 224(sp) +; ZVFHMIN64-NEXT: lh a1, 574(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 318(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 223(sp) -; ZVFHMIN64-NEXT: lh a0, 572(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 223(sp) +; ZVFHMIN64-NEXT: lh a1, 572(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 316(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 222(sp) -; ZVFHMIN64-NEXT: lh a0, 570(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 222(sp) +; ZVFHMIN64-NEXT: lh a1, 570(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 314(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 221(sp) -; ZVFHMIN64-NEXT: lh a0, 568(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 221(sp) +; ZVFHMIN64-NEXT: lh a1, 568(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 312(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 220(sp) -; ZVFHMIN64-NEXT: lh a0, 566(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 220(sp) +; ZVFHMIN64-NEXT: lh a1, 566(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 310(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 219(sp) -; ZVFHMIN64-NEXT: lh a0, 564(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 219(sp) +; ZVFHMIN64-NEXT: lh a1, 564(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 308(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 218(sp) -; ZVFHMIN64-NEXT: lh a0, 562(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 218(sp) +; ZVFHMIN64-NEXT: lh a1, 562(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 306(sp) -; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 7 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 29 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 6 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 28 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 5 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 27 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 4 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 26 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 3 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 25 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 2 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 24 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 1 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 23 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs1r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; ZVFHMIN64-NEXT: vslidedown.vi v26, v8, 15 -; ZVFHMIN64-NEXT: vslidedown.vi v20, v8, 14 -; ZVFHMIN64-NEXT: vslidedown.vi v28, v8, 13 -; ZVFHMIN64-NEXT: vslidedown.vi v10, v8, 12 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: slli a2, a2, 1 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs2r.v v10, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v4, v8, 11 -; ZVFHMIN64-NEXT: vslidedown.vi v2, v8, 10 -; ZVFHMIN64-NEXT: vslidedown.vi v30, v8, 9 -; ZVFHMIN64-NEXT: vslidedown.vi v22, v8, 8 -; ZVFHMIN64-NEXT: vmv.x.s a4, v16 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 217(sp) -; ZVFHMIN64-NEXT: lh a0, 560(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 217(sp) +; ZVFHMIN64-NEXT: lh a1, 560(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 304(sp) -; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; ZVFHMIN64-NEXT: vslidedown.vi v3, v16, 7 -; ZVFHMIN64-NEXT: vslidedown.vi v31, v16, 6 -; ZVFHMIN64-NEXT: vslidedown.vi v5, v16, 5 -; ZVFHMIN64-NEXT: vslidedown.vi v23, v16, 4 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 3 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 21 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 2 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 20 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 1 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 22 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs1r.v v8, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; ZVFHMIN64-NEXT: vslidedown.vi v18, v16, 15 -; ZVFHMIN64-NEXT: vslidedown.vi v14, v16, 14 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v16, 13 -; ZVFHMIN64-NEXT: vslidedown.vi v12, v16, 12 -; ZVFHMIN64-NEXT: vslidedown.vi v10, v16, 11 -; ZVFHMIN64-NEXT: vslidedown.vi v6, v16, 10 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 18 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v6, v16, 9 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 14 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs2r.v v6, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v6, v16, 8 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 216(sp) -; ZVFHMIN64-NEXT: lh a0, 558(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 216(sp) +; ZVFHMIN64-NEXT: lh a1, 558(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 302(sp) -; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; ZVFHMIN64-NEXT: vslidedown.vi v13, v0, 7 -; ZVFHMIN64-NEXT: vslidedown.vi v29, v0, 6 -; ZVFHMIN64-NEXT: vslidedown.vi v11, v0, 5 -; ZVFHMIN64-NEXT: vslidedown.vi v7, v0, 4 -; ZVFHMIN64-NEXT: vslidedown.vi v9, v0, 3 -; ZVFHMIN64-NEXT: vslidedown.vi v21, v0, 2 -; ZVFHMIN64-NEXT: vslidedown.vi v27, v0, 1 -; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; ZVFHMIN64-NEXT: vslidedown.vi v16, v0, 15 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: slli a2, a2, 2 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v16, v0, 14 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: slli a2, a2, 3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v16, v0, 13 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 6 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v16, v0, 12 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 12 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v16, v0, 11 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 10 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v16, v0, 10 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: slli a2, a2, 4 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vs2r.v v16, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vslidedown.vi v16, v0, 9 -; ZVFHMIN64-NEXT: vslidedown.vi v0, v0, 8 -; ZVFHMIN64-NEXT: addi a2, sp, 800 -; ZVFHMIN64-NEXT: vs2r.v v0, (a2) # Unknown-size Folded Spill -; ZVFHMIN64-NEXT: vmv.x.s t4, v26 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 215(sp) -; ZVFHMIN64-NEXT: lh a0, 556(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 215(sp) +; ZVFHMIN64-NEXT: lh a1, 556(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 300(sp) -; ZVFHMIN64-NEXT: vmv.x.s t3, v20 -; ZVFHMIN64-NEXT: vmv.x.s t1, v28 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 214(sp) -; ZVFHMIN64-NEXT: lh a0, 554(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 214(sp) +; ZVFHMIN64-NEXT: lh a1, 554(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 298(sp) -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: slli a2, a2, 1 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vl2r.v v0, (a2) # Unknown-size Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s t2, v0 -; ZVFHMIN64-NEXT: vmv.x.s t0, v4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 213(sp) -; ZVFHMIN64-NEXT: lh a0, 552(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 213(sp) +; ZVFHMIN64-NEXT: lh a1, 552(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 296(sp) -; ZVFHMIN64-NEXT: vmv.x.s a7, v2 -; ZVFHMIN64-NEXT: vmv.x.s a6, v30 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 212(sp) -; ZVFHMIN64-NEXT: lh a0, 550(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 212(sp) +; ZVFHMIN64-NEXT: lh a1, 550(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 294(sp) -; ZVFHMIN64-NEXT: vmv.x.s a5, v22 -; ZVFHMIN64-NEXT: vmv.x.s a2, v18 -; ZVFHMIN64-NEXT: sd a2, 96(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 211(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 211(sp) ; ZVFHMIN64-NEXT: lh a1, 548(sp) -; ZVFHMIN64-NEXT: lh t5, 292(sp) -; ZVFHMIN64-NEXT: vmv.x.s a0, v14 -; ZVFHMIN64-NEXT: sd a0, 104(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: vmv.x.s a0, v8 -; ZVFHMIN64-NEXT: sd a0, 120(sp) # 8-byte Folded Spill ; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 -; ZVFHMIN64-NEXT: fmv.h.x fa4, t5 +; ZVFHMIN64-NEXT: lh a1, 292(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 ; ZVFHMIN64-NEXT: sb a1, 210(sp) ; ZVFHMIN64-NEXT: lh a1, 546(sp) -; ZVFHMIN64-NEXT: lh t5, 290(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a4 -; ZVFHMIN64-NEXT: vmv.x.s a4, v24 +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 290(sp) ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: fmv.h.x fa3, t5 -; ZVFHMIN64-NEXT: feq.h a1, fa4, fa3 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 ; ZVFHMIN64-NEXT: sb a1, 209(sp) ; ZVFHMIN64-NEXT: lh a1, 544(sp) -; ZVFHMIN64-NEXT: lh t5, 288(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, a4 -; ZVFHMIN64-NEXT: feq.h a4, fa5, fa4 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 -; ZVFHMIN64-NEXT: fmv.h.x fa4, t5 +; ZVFHMIN64-NEXT: lh a1, 288(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 -; ZVFHMIN64-NEXT: sb a4, 192(sp) +; ZVFHMIN64-NEXT: sb s11, 192(sp) +; ZVFHMIN64-NEXT: feq.h s11, fa7, ft8 ; ZVFHMIN64-NEXT: sb a1, 208(sp) -; ZVFHMIN64-NEXT: lh t5, 738(sp) -; ZVFHMIN64-NEXT: lh t6, 482(sp) -; ZVFHMIN64-NEXT: vmv.x.s a0, v12 -; ZVFHMIN64-NEXT: sd a0, 88(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: vmv.x.s a0, v10 -; ZVFHMIN64-NEXT: sd a0, 112(sp) # 8-byte Folded Spill -; ZVFHMIN64-NEXT: fmv.h.x fa5, t5 -; ZVFHMIN64-NEXT: fmv.h.x fa4, t6 -; ZVFHMIN64-NEXT: feq.h t5, fa5, fa4 -; ZVFHMIN64-NEXT: sb t5, 177(sp) -; ZVFHMIN64-NEXT: lh t5, 736(sp) -; ZVFHMIN64-NEXT: lh t6, 480(sp) -; ZVFHMIN64-NEXT: csrr a0, vlenb -; ZVFHMIN64-NEXT: li a1, 29 -; ZVFHMIN64-NEXT: mul a0, a0, a1 -; ZVFHMIN64-NEXT: add a0, sp, a0 -; ZVFHMIN64-NEXT: lh s5, 800(a0) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: csrr a0, vlenb -; ZVFHMIN64-NEXT: li a1, 28 -; ZVFHMIN64-NEXT: mul a0, a0, a1 -; ZVFHMIN64-NEXT: add a0, sp, a0 -; ZVFHMIN64-NEXT: lh s6, 800(a0) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: fmv.h.x fa5, t5 -; ZVFHMIN64-NEXT: fmv.h.x fa4, t6 -; ZVFHMIN64-NEXT: feq.h t5, fa5, fa4 -; ZVFHMIN64-NEXT: sb t5, 176(sp) -; ZVFHMIN64-NEXT: lh t5, 734(sp) -; ZVFHMIN64-NEXT: lh t6, 478(sp) -; ZVFHMIN64-NEXT: csrr a0, vlenb -; ZVFHMIN64-NEXT: li a1, 27 -; ZVFHMIN64-NEXT: mul a0, a0, a1 -; ZVFHMIN64-NEXT: add a0, sp, a0 -; ZVFHMIN64-NEXT: lh s7, 800(a0) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: csrr a0, vlenb -; ZVFHMIN64-NEXT: li a1, 26 -; ZVFHMIN64-NEXT: mul a0, a0, a1 -; ZVFHMIN64-NEXT: add a0, sp, a0 -; ZVFHMIN64-NEXT: lh s8, 800(a0) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: fmv.h.x fa5, t5 -; ZVFHMIN64-NEXT: fmv.h.x fa4, t6 -; ZVFHMIN64-NEXT: feq.h t5, fa5, fa4 -; ZVFHMIN64-NEXT: sb t5, 175(sp) -; ZVFHMIN64-NEXT: lh t5, 732(sp) -; ZVFHMIN64-NEXT: lh t6, 476(sp) -; ZVFHMIN64-NEXT: csrr a0, vlenb -; ZVFHMIN64-NEXT: li a1, 25 -; ZVFHMIN64-NEXT: mul a0, a0, a1 -; ZVFHMIN64-NEXT: add a0, sp, a0 -; ZVFHMIN64-NEXT: lh s4, 800(a0) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: csrr a0, vlenb -; ZVFHMIN64-NEXT: li a1, 24 -; ZVFHMIN64-NEXT: mul a0, a0, a1 -; ZVFHMIN64-NEXT: add a0, sp, a0 -; ZVFHMIN64-NEXT: lh s3, 800(a0) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: fmv.h.x fa5, t5 -; ZVFHMIN64-NEXT: fmv.h.x fa4, t6 -; ZVFHMIN64-NEXT: feq.h t5, fa5, fa4 -; ZVFHMIN64-NEXT: sb t5, 174(sp) -; ZVFHMIN64-NEXT: lh t6, 730(sp) -; ZVFHMIN64-NEXT: lh s9, 474(sp) -; ZVFHMIN64-NEXT: csrr a0, vlenb -; ZVFHMIN64-NEXT: li a1, 23 -; ZVFHMIN64-NEXT: mul a0, a0, a1 -; ZVFHMIN64-NEXT: add a0, sp, a0 -; ZVFHMIN64-NEXT: lh s2, 800(a0) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s t5, v3 -; ZVFHMIN64-NEXT: fmv.h.x fa5, t6 -; ZVFHMIN64-NEXT: fmv.h.x fa4, s9 -; ZVFHMIN64-NEXT: feq.h t6, fa5, fa4 -; ZVFHMIN64-NEXT: sb t6, 173(sp) -; ZVFHMIN64-NEXT: lh s9, 728(sp) -; ZVFHMIN64-NEXT: lh s10, 472(sp) -; ZVFHMIN64-NEXT: vmv.x.s t6, v31 -; ZVFHMIN64-NEXT: vmv.x.s ra, v13 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s9 -; ZVFHMIN64-NEXT: fmv.h.x fa4, s10 -; ZVFHMIN64-NEXT: feq.h s9, fa5, fa4 -; ZVFHMIN64-NEXT: sb s9, 172(sp) -; ZVFHMIN64-NEXT: lh s9, 726(sp) -; ZVFHMIN64-NEXT: lh s10, 470(sp) -; ZVFHMIN64-NEXT: vmv.x.s a2, v29 -; ZVFHMIN64-NEXT: vmv.x.s a3, v11 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s9 -; ZVFHMIN64-NEXT: fmv.h.x fa4, s10 -; ZVFHMIN64-NEXT: feq.h s9, fa5, fa4 -; ZVFHMIN64-NEXT: sb s9, 171(sp) -; ZVFHMIN64-NEXT: lh s10, 724(sp) -; ZVFHMIN64-NEXT: lh s11, 468(sp) -; ZVFHMIN64-NEXT: vmv.x.s a4, v7 -; ZVFHMIN64-NEXT: vmv.x.s s9, v9 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s10 -; ZVFHMIN64-NEXT: fmv.h.x fa4, s11 -; ZVFHMIN64-NEXT: feq.h s10, fa5, fa4 -; ZVFHMIN64-NEXT: sb s10, 170(sp) -; ZVFHMIN64-NEXT: lh a0, 722(sp) +; ZVFHMIN64-NEXT: lh a1, 738(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 482(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 177(sp) +; ZVFHMIN64-NEXT: lh a1, 736(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 480(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 176(sp) +; ZVFHMIN64-NEXT: lh a1, 734(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 478(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 175(sp) +; ZVFHMIN64-NEXT: lh a1, 732(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 476(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 174(sp) +; ZVFHMIN64-NEXT: lh a1, 730(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 474(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 173(sp) +; ZVFHMIN64-NEXT: lh a1, 728(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 472(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 172(sp) +; ZVFHMIN64-NEXT: lh a1, 726(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 470(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 171(sp) +; ZVFHMIN64-NEXT: lh a1, 724(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 468(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 170(sp) +; ZVFHMIN64-NEXT: lh a1, 722(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 466(sp) -; ZVFHMIN64-NEXT: vmv.x.s s10, v21 -; ZVFHMIN64-NEXT: vmv.x.s s11, v27 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 169(sp) -; ZVFHMIN64-NEXT: lh a0, 720(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 169(sp) +; ZVFHMIN64-NEXT: lh a1, 720(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 464(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, s5 -; ZVFHMIN64-NEXT: fmv.h.x fa4, s6 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN64-NEXT: sb a0, 168(sp) -; ZVFHMIN64-NEXT: lh a0, 718(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 168(sp) +; ZVFHMIN64-NEXT: lh a1, 718(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 462(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa3, s7 -; ZVFHMIN64-NEXT: fmv.h.x fa2, s8 -; ZVFHMIN64-NEXT: fmv.h.x fa1, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa0, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa1, fa0 -; ZVFHMIN64-NEXT: fmv.h.x fa1, ra -; ZVFHMIN64-NEXT: sb a0, 167(sp) -; ZVFHMIN64-NEXT: lh a0, 716(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa0, a2 +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 167(sp) +; ZVFHMIN64-NEXT: lh a1, 716(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 460(sp) -; ZVFHMIN64-NEXT: feq.h s5, fa5, fa1 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: feq.h a0, fa4, fa0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s4 ; ZVFHMIN64-NEXT: sb a1, 166(sp) ; ZVFHMIN64-NEXT: lh a1, 714(sp) -; ZVFHMIN64-NEXT: lh a2, 458(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, a3 -; ZVFHMIN64-NEXT: feq.h a3, fa3, fa4 +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 458(sp) ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a2 -; ZVFHMIN64-NEXT: feq.h a1, fa4, fa3 -; ZVFHMIN64-NEXT: fmv.h.x fa4, s3 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 ; ZVFHMIN64-NEXT: sb a1, 165(sp) ; ZVFHMIN64-NEXT: lh a1, 712(sp) -; ZVFHMIN64-NEXT: lh a2, 456(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa3, a4 -; ZVFHMIN64-NEXT: feq.h a4, fa2, fa3 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a1 -; ZVFHMIN64-NEXT: fmv.h.x fa2, a2 -; ZVFHMIN64-NEXT: feq.h a1, fa3, fa2 -; ZVFHMIN64-NEXT: fmv.h.x fa3, s2 +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 456(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 ; ZVFHMIN64-NEXT: sb a1, 164(sp) ; ZVFHMIN64-NEXT: lh a1, 710(sp) -; ZVFHMIN64-NEXT: lh a2, 454(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa2, s9 -; ZVFHMIN64-NEXT: feq.h s2, fa5, fa2 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 -; ZVFHMIN64-NEXT: fmv.h.x fa2, a2 -; ZVFHMIN64-NEXT: feq.h a1, fa5, fa2 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s10 -; ZVFHMIN64-NEXT: fmv.h.x fa2, s11 +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 454(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 ; ZVFHMIN64-NEXT: sb a1, 163(sp) ; ZVFHMIN64-NEXT: lh a1, 708(sp) -; ZVFHMIN64-NEXT: lh a2, 452(sp) -; ZVFHMIN64-NEXT: feq.h s3, fa4, fa5 -; ZVFHMIN64-NEXT: feq.h s4, fa3, fa2 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 +; ZVFHMIN64-NEXT: lh a1, 452(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: fmv.h.x fa5, s3 ; ZVFHMIN64-NEXT: sb a1, 162(sp) ; ZVFHMIN64-NEXT: lh a1, 706(sp) ; ZVFHMIN64-NEXT: lh a2, 450(sp) -; ZVFHMIN64-NEXT: sb s4, 129(sp) -; ZVFHMIN64-NEXT: sb s3, 130(sp) -; ZVFHMIN64-NEXT: sb s2, 131(sp) -; ZVFHMIN64-NEXT: sb a4, 132(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 -; ZVFHMIN64-NEXT: sb a3, 133(sp) -; ZVFHMIN64-NEXT: sb a0, 134(sp) -; ZVFHMIN64-NEXT: sb s5, 135(sp) +; ZVFHMIN64-NEXT: sb s10, 129(sp) +; ZVFHMIN64-NEXT: flh fa4, 102(sp) # 2-byte Folded Reload +; ZVFHMIN64-NEXT: feq.h s10, fa4, ft2 +; ZVFHMIN64-NEXT: sb s9, 130(sp) +; ZVFHMIN64-NEXT: feq.h s9, fa3, ft3 +; ZVFHMIN64-NEXT: sb s8, 131(sp) +; ZVFHMIN64-NEXT: feq.h ra, fa2, ft4 +; ZVFHMIN64-NEXT: sb s7, 132(sp) +; ZVFHMIN64-NEXT: feq.h s3, fa1, ft5 +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h s7, fa0, ft6 +; ZVFHMIN64-NEXT: fmv.h.x fa3, a2 +; ZVFHMIN64-NEXT: feq.h s8, ft0, ft7 +; ZVFHMIN64-NEXT: sb s6, 133(sp) +; ZVFHMIN64-NEXT: feq.h s6, ft1, fa6 +; ZVFHMIN64-NEXT: sb s5, 134(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa4, fa3 +; ZVFHMIN64-NEXT: sb s4, 135(sp) +; ZVFHMIN64-NEXT: flh fa4, 100(sp) # 2-byte Folded Reload +; ZVFHMIN64-NEXT: feq.h s4, fa4, fa5 ; ZVFHMIN64-NEXT: sb a1, 161(sp) -; ZVFHMIN64-NEXT: lh a0, 610(sp) +; ZVFHMIN64-NEXT: lh a1, 610(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 354(sp) -; ZVFHMIN64-NEXT: vmv.x.s s6, v5 -; ZVFHMIN64-NEXT: vmv.x.s s5, v23 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 241(sp) -; ZVFHMIN64-NEXT: lh a0, 608(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 241(sp) +; ZVFHMIN64-NEXT: lh a1, 608(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 352(sp) -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 21 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: lh s4, 800(a2) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 20 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: lh s3, 800(a2) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 240(sp) -; ZVFHMIN64-NEXT: lh a0, 606(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 240(sp) +; ZVFHMIN64-NEXT: lh a1, 606(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 350(sp) -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 22 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: lh s2, 800(a2) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: fmv.h.x fa5, t5 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa4, fa3 -; ZVFHMIN64-NEXT: sb a0, 239(sp) -; ZVFHMIN64-NEXT: lh a0, 604(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 239(sp) +; ZVFHMIN64-NEXT: lh a1, 604(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 348(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, t6 -; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m1, ta, ma -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 7 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN64-NEXT: sb a0, 238(sp) -; ZVFHMIN64-NEXT: lh a0, 602(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 238(sp) +; ZVFHMIN64-NEXT: lh a1, 602(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 346(sp) -; ZVFHMIN64-NEXT: vmv.x.s a2, v8 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 6 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN64-NEXT: sb a0, 237(sp) -; ZVFHMIN64-NEXT: lh a0, 600(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 237(sp) +; ZVFHMIN64-NEXT: lh a1, 600(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 344(sp) -; ZVFHMIN64-NEXT: vmv.x.s a3, v8 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 5 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN64-NEXT: sb a0, 236(sp) -; ZVFHMIN64-NEXT: lh a0, 598(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 236(sp) +; ZVFHMIN64-NEXT: lh a1, 598(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 342(sp) -; ZVFHMIN64-NEXT: vmv.x.s a4, v8 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 4 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN64-NEXT: sb a0, 235(sp) -; ZVFHMIN64-NEXT: lh a0, 596(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 235(sp) +; ZVFHMIN64-NEXT: lh a1, 596(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 340(sp) -; ZVFHMIN64-NEXT: vmv.x.s s8, v8 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 3 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN64-NEXT: sb a0, 234(sp) -; ZVFHMIN64-NEXT: lh a0, 594(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 234(sp) +; ZVFHMIN64-NEXT: lh a1, 594(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 338(sp) -; ZVFHMIN64-NEXT: vmv.x.s s9, v8 -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 2 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa2, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN64-NEXT: sb a0, 233(sp) -; ZVFHMIN64-NEXT: lh a0, 592(sp) -; ZVFHMIN64-NEXT: vmv.x.s a1, v8 -; ZVFHMIN64-NEXT: lh t5, 336(sp) -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 1 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a0 -; ZVFHMIN64-NEXT: vmv.x.s s7, v8 -; ZVFHMIN64-NEXT: fmv.h.x fa2, t5 -; ZVFHMIN64-NEXT: feq.h a0, fa3, fa2 -; ZVFHMIN64-NEXT: fmv.h.x fa3, a2 -; ZVFHMIN64-NEXT: sb a0, 232(sp) -; ZVFHMIN64-NEXT: lh a0, 590(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa2, a3 -; ZVFHMIN64-NEXT: lh a2, 334(sp) -; ZVFHMIN64-NEXT: feq.h t5, fa5, fa3 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: feq.h t6, fa4, fa2 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s6 -; ZVFHMIN64-NEXT: sb a0, 231(sp) -; ZVFHMIN64-NEXT: lh a0, 588(sp) -; ZVFHMIN64-NEXT: lh a2, 332(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, a4 -; ZVFHMIN64-NEXT: feq.h a3, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s5 -; ZVFHMIN64-NEXT: sb a0, 230(sp) -; ZVFHMIN64-NEXT: lh a0, 586(sp) -; ZVFHMIN64-NEXT: lh a2, 330(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, s8 -; ZVFHMIN64-NEXT: feq.h a4, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s4 -; ZVFHMIN64-NEXT: sb a0, 229(sp) -; ZVFHMIN64-NEXT: lh a0, 584(sp) -; ZVFHMIN64-NEXT: lh a2, 328(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, s9 -; ZVFHMIN64-NEXT: feq.h s4, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s3 -; ZVFHMIN64-NEXT: sb a0, 228(sp) -; ZVFHMIN64-NEXT: lh a0, 582(sp) -; ZVFHMIN64-NEXT: lh a2, 326(sp) ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s2 -; ZVFHMIN64-NEXT: sb a0, 227(sp) -; ZVFHMIN64-NEXT: lh a0, 580(sp) -; ZVFHMIN64-NEXT: lh a2, 324(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, s7 -; ZVFHMIN64-NEXT: feq.h s2, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 226(sp) -; ZVFHMIN64-NEXT: lh a0, 578(sp) +; ZVFHMIN64-NEXT: sb a1, 233(sp) +; ZVFHMIN64-NEXT: lh a1, 592(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 336(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 232(sp) +; ZVFHMIN64-NEXT: lh a1, 590(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 334(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 231(sp) +; ZVFHMIN64-NEXT: lh a1, 588(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 332(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 230(sp) +; ZVFHMIN64-NEXT: lh a1, 586(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 330(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 229(sp) +; ZVFHMIN64-NEXT: lh a1, 584(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 328(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 228(sp) +; ZVFHMIN64-NEXT: lh a1, 582(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 326(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 227(sp) +; ZVFHMIN64-NEXT: lh a1, 580(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 324(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: fmv.h.x fa5, a4 +; ZVFHMIN64-NEXT: fmv.h.x fa4, a7 +; ZVFHMIN64-NEXT: sb a1, 226(sp) +; ZVFHMIN64-NEXT: lh a1, 578(sp) ; ZVFHMIN64-NEXT: lh a2, 322(sp) -; ZVFHMIN64-NEXT: sb s2, 193(sp) -; ZVFHMIN64-NEXT: sb a1, 194(sp) -; ZVFHMIN64-NEXT: sb s4, 195(sp) -; ZVFHMIN64-NEXT: sb a4, 196(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 +; ZVFHMIN64-NEXT: sb t6, 193(sp) +; ZVFHMIN64-NEXT: sb t5, 194(sp) +; ZVFHMIN64-NEXT: sb t4, 195(sp) +; ZVFHMIN64-NEXT: sb t3, 196(sp) +; ZVFHMIN64-NEXT: feq.h a4, fa5, fa4 +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a3, 197(sp) -; ZVFHMIN64-NEXT: sb t6, 198(sp) -; ZVFHMIN64-NEXT: sb t5, 199(sp) -; ZVFHMIN64-NEXT: sb a0, 225(sp) -; ZVFHMIN64-NEXT: lh a0, 766(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb t2, 197(sp) +; ZVFHMIN64-NEXT: sb t1, 198(sp) +; ZVFHMIN64-NEXT: sb t0, 199(sp) +; ZVFHMIN64-NEXT: sb a1, 225(sp) +; ZVFHMIN64-NEXT: lh a1, 766(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 510(sp) -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 18 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s s2, v8 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: li a3, 14 -; ZVFHMIN64-NEXT: mul a2, a2, a3 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s t6, v8 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 191(sp) -; ZVFHMIN64-NEXT: lh a0, 764(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 191(sp) +; ZVFHMIN64-NEXT: lh a1, 764(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 508(sp) -; ZVFHMIN64-NEXT: vmv.x.s t5, v6 -; ZVFHMIN64-NEXT: csrr a2, vlenb -; ZVFHMIN64-NEXT: slli a2, a2, 2 -; ZVFHMIN64-NEXT: add a2, sp, a2 -; ZVFHMIN64-NEXT: addi a2, a2, 800 -; ZVFHMIN64-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s a2, v8 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 190(sp) -; ZVFHMIN64-NEXT: lh a0, 762(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 190(sp) +; ZVFHMIN64-NEXT: lh a1, 762(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 506(sp) -; ZVFHMIN64-NEXT: csrr a3, vlenb -; ZVFHMIN64-NEXT: slli a3, a3, 3 -; ZVFHMIN64-NEXT: add a3, sp, a3 -; ZVFHMIN64-NEXT: addi a3, a3, 800 -; ZVFHMIN64-NEXT: vl2r.v v8, (a3) # Unknown-size Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s a3, v8 -; ZVFHMIN64-NEXT: csrr a4, vlenb -; ZVFHMIN64-NEXT: li s3, 6 -; ZVFHMIN64-NEXT: mul a4, a4, s3 -; ZVFHMIN64-NEXT: add a4, sp, a4 -; ZVFHMIN64-NEXT: addi a4, a4, 800 -; ZVFHMIN64-NEXT: vl2r.v v8, (a4) # Unknown-size Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s a4, v8 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 189(sp) -; ZVFHMIN64-NEXT: lh a0, 760(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 189(sp) +; ZVFHMIN64-NEXT: lh a1, 760(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 504(sp) -; ZVFHMIN64-NEXT: csrr s3, vlenb -; ZVFHMIN64-NEXT: li s4, 12 -; ZVFHMIN64-NEXT: mul s3, s3, s4 -; ZVFHMIN64-NEXT: add s3, sp, s3 -; ZVFHMIN64-NEXT: addi s3, s3, 800 -; ZVFHMIN64-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s s6, v8 -; ZVFHMIN64-NEXT: csrr s3, vlenb -; ZVFHMIN64-NEXT: li s4, 10 -; ZVFHMIN64-NEXT: mul s3, s3, s4 -; ZVFHMIN64-NEXT: add s3, sp, s3 -; ZVFHMIN64-NEXT: addi s3, s3, 800 -; ZVFHMIN64-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s s4, v8 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 188(sp) -; ZVFHMIN64-NEXT: lh a0, 758(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 188(sp) +; ZVFHMIN64-NEXT: lh a1, 758(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 502(sp) -; ZVFHMIN64-NEXT: csrr s3, vlenb -; ZVFHMIN64-NEXT: slli s3, s3, 4 -; ZVFHMIN64-NEXT: add s3, sp, s3 -; ZVFHMIN64-NEXT: addi s3, s3, 800 -; ZVFHMIN64-NEXT: vl2r.v v8, (s3) # Unknown-size Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s s5, v8 -; ZVFHMIN64-NEXT: vmv.x.s s3, v16 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, t4 -; ZVFHMIN64-NEXT: sb a0, 187(sp) -; ZVFHMIN64-NEXT: lh a0, 756(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 187(sp) +; ZVFHMIN64-NEXT: lh a1, 756(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 500(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 -; ZVFHMIN64-NEXT: feq.h t4, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, t3 -; ZVFHMIN64-NEXT: sb a0, 186(sp) -; ZVFHMIN64-NEXT: lh a0, 754(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 186(sp) +; ZVFHMIN64-NEXT: lh a1, 754(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 498(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, a3 -; ZVFHMIN64-NEXT: feq.h t3, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, t1 -; ZVFHMIN64-NEXT: sb a0, 185(sp) -; ZVFHMIN64-NEXT: lh a0, 752(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 185(sp) +; ZVFHMIN64-NEXT: lh a1, 752(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 496(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, a4 -; ZVFHMIN64-NEXT: feq.h t1, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, t2 -; ZVFHMIN64-NEXT: sb a0, 184(sp) -; ZVFHMIN64-NEXT: lh a0, 750(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 184(sp) +; ZVFHMIN64-NEXT: lh a1, 750(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 494(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, s6 -; ZVFHMIN64-NEXT: feq.h a2, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, t0 -; ZVFHMIN64-NEXT: sb a0, 183(sp) -; ZVFHMIN64-NEXT: lh a0, 748(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 183(sp) +; ZVFHMIN64-NEXT: lh a1, 748(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 492(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, s4 -; ZVFHMIN64-NEXT: feq.h a3, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a7 -; ZVFHMIN64-NEXT: sb a0, 182(sp) -; ZVFHMIN64-NEXT: lh a0, 746(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 182(sp) +; ZVFHMIN64-NEXT: lh a1, 746(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 490(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, s5 -; ZVFHMIN64-NEXT: feq.h a4, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a6 -; ZVFHMIN64-NEXT: sb a0, 181(sp) -; ZVFHMIN64-NEXT: lh a0, 744(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 181(sp) +; ZVFHMIN64-NEXT: lh a1, 744(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: lh a1, 488(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, s3 -; ZVFHMIN64-NEXT: feq.h a6, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a5 -; ZVFHMIN64-NEXT: addi a1, sp, 800 -; ZVFHMIN64-NEXT: vl2r.v v8, (a1) # Unknown-size Folded Reload -; ZVFHMIN64-NEXT: vmv.x.s a1, v8 -; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 15 -; ZVFHMIN64-NEXT: vmv.x.s a5, v8 -; ZVFHMIN64-NEXT: sb a0, 180(sp) -; ZVFHMIN64-NEXT: lh a0, 742(sp) -; ZVFHMIN64-NEXT: lh a7, 486(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: sb a1, 180(sp) +; ZVFHMIN64-NEXT: lh a1, 742(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 +; ZVFHMIN64-NEXT: lh a1, 486(sp) ; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 ; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a7 -; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: sb a0, 179(sp) -; ZVFHMIN64-NEXT: lh a0, 740(sp) -; ZVFHMIN64-NEXT: lh a7, 484(sp) -; ZVFHMIN64-NEXT: sb a2, 140(sp) -; ZVFHMIN64-NEXT: sb t1, 141(sp) -; ZVFHMIN64-NEXT: sb t3, 142(sp) -; ZVFHMIN64-NEXT: sb t4, 143(sp) -; ZVFHMIN64-NEXT: sb a1, 136(sp) -; ZVFHMIN64-NEXT: sb a6, 137(sp) -; ZVFHMIN64-NEXT: sb a4, 138(sp) -; ZVFHMIN64-NEXT: sb a3, 139(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a7 +; ZVFHMIN64-NEXT: ld a2, 112(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x fa5, a2 +; ZVFHMIN64-NEXT: ld a2, 104(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 +; ZVFHMIN64-NEXT: sb a1, 179(sp) +; ZVFHMIN64-NEXT: lh a2, 740(sp) +; ZVFHMIN64-NEXT: lh a3, 484(sp) +; ZVFHMIN64-NEXT: sb s2, 140(sp) +; ZVFHMIN64-NEXT: sb a6, 141(sp) +; ZVFHMIN64-NEXT: sb a5, 142(sp) +; ZVFHMIN64-NEXT: sb a0, 143(sp) +; ZVFHMIN64-NEXT: sb ra, 136(sp) +; ZVFHMIN64-NEXT: sb s9, 137(sp) +; ZVFHMIN64-NEXT: sb s10, 138(sp) +; ZVFHMIN64-NEXT: sb s11, 139(sp) +; ZVFHMIN64-NEXT: feq.h a1, fa5, fa4 +; ZVFHMIN64-NEXT: fmv.h.x fa5, a2 +; ZVFHMIN64-NEXT: fmv.h.x fa4, a3 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN64-NEXT: sb a0, 178(sp) ; ZVFHMIN64-NEXT: lh a0, 638(sp) -; ZVFHMIN64-NEXT: lh a1, 382(sp) -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 14 -; ZVFHMIN64-NEXT: vmv.x.s t3, v8 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 382(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN64-NEXT: sb a0, 255(sp) ; ZVFHMIN64-NEXT: lh a0, 636(sp) -; ZVFHMIN64-NEXT: lh a1, 380(sp) -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 13 -; ZVFHMIN64-NEXT: vmv.x.s t2, v8 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 380(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN64-NEXT: sb a0, 254(sp) ; ZVFHMIN64-NEXT: lh a0, 634(sp) -; ZVFHMIN64-NEXT: lh a1, 378(sp) -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 12 -; ZVFHMIN64-NEXT: vmv.x.s t1, v8 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 378(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN64-NEXT: sb a0, 253(sp) ; ZVFHMIN64-NEXT: lh a0, 632(sp) -; ZVFHMIN64-NEXT: lh a1, 376(sp) -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 11 -; ZVFHMIN64-NEXT: vmv.x.s t0, v8 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 376(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN64-NEXT: sb a0, 252(sp) ; ZVFHMIN64-NEXT: lh a0, 630(sp) -; ZVFHMIN64-NEXT: lh a1, 374(sp) -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 10 -; ZVFHMIN64-NEXT: vmv.x.s a7, v8 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 374(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN64-NEXT: sb a0, 251(sp) ; ZVFHMIN64-NEXT: lh a0, 628(sp) -; ZVFHMIN64-NEXT: lh a1, 372(sp) -; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 9 -; ZVFHMIN64-NEXT: vmv.x.s a6, v8 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 372(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: ld a1, 96(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: sb a0, 250(sp) ; ZVFHMIN64-NEXT: lh a0, 626(sp) -; ZVFHMIN64-NEXT: lh a1, 370(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, a5 -; ZVFHMIN64-NEXT: feq.h a3, fa5, fa4 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 370(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: ld a1, 104(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: sb a0, 249(sp) ; ZVFHMIN64-NEXT: lh a0, 624(sp) -; ZVFHMIN64-NEXT: lh a1, 368(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, t3 -; ZVFHMIN64-NEXT: feq.h a2, fa5, fa4 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 368(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: ld a1, 120(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: sb a0, 248(sp) ; ZVFHMIN64-NEXT: lh a0, 622(sp) -; ZVFHMIN64-NEXT: lh a1, 366(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, t2 -; ZVFHMIN64-NEXT: feq.h a4, fa5, fa4 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 366(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: ld a1, 88(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: sb a0, 247(sp) ; ZVFHMIN64-NEXT: lh a0, 620(sp) -; ZVFHMIN64-NEXT: lh a1, 364(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, t1 -; ZVFHMIN64-NEXT: feq.h a5, fa5, fa4 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 364(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: ld a1, 112(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: fmv.h.x fa5, a1 ; ZVFHMIN64-NEXT: sb a0, 246(sp) ; ZVFHMIN64-NEXT: lh a0, 618(sp) -; ZVFHMIN64-NEXT: lh a1, 362(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, t0 -; ZVFHMIN64-NEXT: feq.h t0, fa5, fa4 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 362(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, s2 ; ZVFHMIN64-NEXT: sb a0, 245(sp) ; ZVFHMIN64-NEXT: lh a0, 616(sp) -; ZVFHMIN64-NEXT: lh a1, 360(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, a7 -; ZVFHMIN64-NEXT: feq.h a7, fa5, fa4 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 360(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, t6 ; ZVFHMIN64-NEXT: sb a0, 244(sp) ; ZVFHMIN64-NEXT: lh a0, 614(sp) -; ZVFHMIN64-NEXT: lh a1, 358(sp) -; ZVFHMIN64-NEXT: fmv.h.x fa4, a6 -; ZVFHMIN64-NEXT: feq.h a6, fa5, fa4 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: lh a0, 358(sp) +; ZVFHMIN64-NEXT: fmv.h.x fa4, a0 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 -; ZVFHMIN64-NEXT: fmv.h.x fa5, t5 +; ZVFHMIN64-NEXT: ld a2, 120(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fmv.h.x fa5, a2 +; ZVFHMIN64-NEXT: vsetivli zero, 1, e16, m2, ta, ma ; ZVFHMIN64-NEXT: vslidedown.vi v8, v24, 8 -; ZVFHMIN64-NEXT: vmv.x.s a1, v8 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: vmv.x.s a2, v8 +; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 ; ZVFHMIN64-NEXT: sb a0, 243(sp) ; ZVFHMIN64-NEXT: lh a0, 612(sp) -; ZVFHMIN64-NEXT: lh a1, 356(sp) -; ZVFHMIN64-NEXT: sb a5, 204(sp) -; ZVFHMIN64-NEXT: sb a4, 205(sp) -; ZVFHMIN64-NEXT: sb a2, 206(sp) -; ZVFHMIN64-NEXT: sb a3, 207(sp) -; ZVFHMIN64-NEXT: feq.h a2, fa5, fa4 -; ZVFHMIN64-NEXT: sb a2, 200(sp) -; ZVFHMIN64-NEXT: sb a6, 201(sp) -; ZVFHMIN64-NEXT: sb a7, 202(sp) -; ZVFHMIN64-NEXT: sb t0, 203(sp) -; ZVFHMIN64-NEXT: li a2, 128 +; ZVFHMIN64-NEXT: lh a2, 356(sp) +; ZVFHMIN64-NEXT: sb s6, 204(sp) +; ZVFHMIN64-NEXT: sb s8, 205(sp) +; ZVFHMIN64-NEXT: sb s7, 206(sp) +; ZVFHMIN64-NEXT: sb s3, 207(sp) +; ZVFHMIN64-NEXT: feq.h a3, fa5, fa4 +; ZVFHMIN64-NEXT: sb a3, 200(sp) +; ZVFHMIN64-NEXT: sb a1, 201(sp) +; ZVFHMIN64-NEXT: sb a4, 202(sp) +; ZVFHMIN64-NEXT: sb s4, 203(sp) +; ZVFHMIN64-NEXT: li a1, 128 ; ZVFHMIN64-NEXT: fmv.h.x fa5, a0 -; ZVFHMIN64-NEXT: fmv.h.x fa4, a1 +; ZVFHMIN64-NEXT: fmv.h.x fa4, a2 ; ZVFHMIN64-NEXT: feq.h a0, fa5, fa4 ; ZVFHMIN64-NEXT: sb a0, 242(sp) ; ZVFHMIN64-NEXT: addi a0, sp, 128 -; ZVFHMIN64-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; ZVFHMIN64-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; ZVFHMIN64-NEXT: vle8.v v8, (a0) ; ZVFHMIN64-NEXT: vand.vi v8, v8, 1 ; ZVFHMIN64-NEXT: vmsne.vi v0, v8, 0 -; ZVFHMIN64-NEXT: addi sp, s0, -896 -; ZVFHMIN64-NEXT: .cfi_def_cfa sp, 896 -; ZVFHMIN64-NEXT: ld ra, 888(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s0, 880(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s2, 872(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s3, 864(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s4, 856(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s5, 848(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s6, 840(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s7, 832(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s8, 824(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s9, 816(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s10, 808(sp) # 8-byte Folded Reload -; ZVFHMIN64-NEXT: ld s11, 800(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: addi sp, s0, -1024 +; ZVFHMIN64-NEXT: .cfi_def_cfa sp, 1024 +; ZVFHMIN64-NEXT: ld ra, 1016(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s0, 1008(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s2, 1000(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s3, 992(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s4, 984(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s5, 976(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s6, 968(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s7, 960(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s8, 952(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s9, 944(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s10, 936(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: ld s11, 928(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs0, 920(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs1, 912(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs2, 904(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs3, 896(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs4, 888(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs5, 880(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs6, 872(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs7, 864(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs8, 856(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs9, 848(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs10, 840(sp) # 8-byte Folded Reload +; ZVFHMIN64-NEXT: fld fs11, 832(sp) # 8-byte Folded Reload ; ZVFHMIN64-NEXT: .cfi_restore ra ; ZVFHMIN64-NEXT: .cfi_restore s0 ; ZVFHMIN64-NEXT: .cfi_restore s2 @@ -3376,7 +3570,19 @@ define <128 x i1> @fcmp_oeq_vv_v128f16(<128 x half> %va, <128 x half> %vb, <128 ; ZVFHMIN64-NEXT: .cfi_restore s9 ; ZVFHMIN64-NEXT: .cfi_restore s10 ; ZVFHMIN64-NEXT: .cfi_restore s11 -; ZVFHMIN64-NEXT: addi sp, sp, 896 +; ZVFHMIN64-NEXT: .cfi_restore fs0 +; ZVFHMIN64-NEXT: .cfi_restore fs1 +; ZVFHMIN64-NEXT: .cfi_restore fs2 +; ZVFHMIN64-NEXT: .cfi_restore fs3 +; ZVFHMIN64-NEXT: .cfi_restore fs4 +; ZVFHMIN64-NEXT: .cfi_restore fs5 +; ZVFHMIN64-NEXT: .cfi_restore fs6 +; ZVFHMIN64-NEXT: .cfi_restore fs7 +; ZVFHMIN64-NEXT: .cfi_restore fs8 +; ZVFHMIN64-NEXT: .cfi_restore fs9 +; ZVFHMIN64-NEXT: .cfi_restore fs10 +; ZVFHMIN64-NEXT: .cfi_restore fs11 +; ZVFHMIN64-NEXT: addi sp, sp, 1024 ; ZVFHMIN64-NEXT: .cfi_def_cfa_offset 0 ; ZVFHMIN64-NEXT: ret %v = call <128 x i1> @llvm.vp.fcmp.v128f16(<128 x half> %va, <128 x half> %vb, metadata !"oeq", <128 x i1> %m, i32 %evl) @@ -3953,20 +4159,20 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB87_2 +; CHECK-NEXT: bltu a2, a3, .LBB87_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB87_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v7, v8, v16, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 @@ -3977,13 +4183,13 @@ define <32 x i1> @fcmp_oeq_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 x ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v8, v16, v24, v0.t +; CHECK-NEXT: vmfeq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vslideup.vi v7, v8, 2 +; CHECK-NEXT: vslideup.vi v7, v16, 2 ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll index 69d6ffa9f300c..81b8b2d5a2c88 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-setcc-int-vp.ll @@ -592,55 +592,30 @@ declare <256 x i1> @llvm.vp.icmp.v256i8(<256 x i8>, <256 x i8>, metadata, <256 x define <256 x i1> @icmp_eq_vv_v256i8(<256 x i8> %va, <256 x i8> %vb, <256 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vv_v256i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: li a1, 128 ; CHECK-NEXT: addi a4, a0, 128 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: addi a2, a3, -128 -; CHECK-NEXT: vle8.v v8, (a4) +; CHECK-NEXT: vle8.v v24, (a4) ; CHECK-NEXT: sltu a4, a3, a2 -; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a2, a4, a2 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vv v6, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v6, v16, v24, v0.t +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vle8.v v24, (a0) ; CHECK-NEXT: bltu a3, a1, .LBB51_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: .LBB51_2: ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call <256 x i1> @llvm.vp.icmp.v256i8(<256 x i8> %va, <256 x i8> %vb, metadata !"eq", <256 x i1> %m, i32 %evl) ret <256 x i1> %v @@ -652,12 +627,12 @@ define <256 x i1> @icmp_eq_vx_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 z ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: addi a4, a2, -128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) -; CHECK-NEXT: addi a1, a2, -128 -; CHECK-NEXT: sltu a4, a2, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 +; CHECK-NEXT: sltu a1, a2, a4 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a4 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB52_2 @@ -682,12 +657,12 @@ define <256 x i1> @icmp_eq_vx_swap_v256i8(<256 x i8> %va, i8 %b, <256 x i1> %m, ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: addi a4, a2, -128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) -; CHECK-NEXT: addi a1, a2, -128 -; CHECK-NEXT: sltu a4, a2, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 +; CHECK-NEXT: sltu a1, a2, a4 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a4 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vx v25, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB53_2 @@ -1263,19 +1238,19 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: li a3, 32 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a1) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: vle32.v v16, (a0) -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 4 ; CHECK-NEXT: bltu a2, a3, .LBB99_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB99_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vv v7, v8, v16, v0.t ; CHECK-NEXT: addi a0, a2, -32 ; CHECK-NEXT: sltu a1, a2, a0 @@ -1308,9 +1283,9 @@ define <64 x i1> @icmp_eq_vv_v64i32(<64 x i32> %va, <64 x i32> %vb, <64 x i1> %m define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB100_2 ; CHECK-NEXT: # %bb.1: @@ -1338,9 +1313,9 @@ define <64 x i1> @icmp_eq_vx_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 ze define <64 x i1> @icmp_eq_vx_swap_v64i32(<64 x i32> %va, i32 %b, <64 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vx_swap_v64i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 4 +; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: bltu a1, a3, .LBB101_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll index d1980ee3b0a6f..26477edb33adc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sext-vp.ll @@ -151,9 +151,9 @@ declare <32 x i64> @llvm.vp.sext.v32i64.v32i32(<32 x i32>, <32 x i1>, i32) define <32 x i64> @vsext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsext_v32i64_v32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB12_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index e4b6e5c47fd98..d64b39488023b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -31,8 +31,8 @@ define <8 x i1> @v8i1_v16i1(<16 x i1>) { ; RV32-NEXT: srli a0, a0, 31 ; RV32-NEXT: vslide1down.vx v8, v8, a1 ; RV32-NEXT: vslide1down.vx v9, v9, a2 -; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslide1down.vx v9, v9, a0 +; RV32-NEXT: vmv.v.i v0, 15 ; RV32-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV32-NEXT: vand.vi v8, v8, 1 ; RV32-NEXT: vmsne.vi v0, v8, 0 @@ -65,8 +65,8 @@ define <8 x i1> @v8i1_v16i1(<16 x i1>) { ; RV64-NEXT: srli a0, a0, 63 ; RV64-NEXT: vslide1down.vx v8, v8, a1 ; RV64-NEXT: vslide1down.vx v9, v9, a2 -; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslide1down.vx v9, v9, a0 +; RV64-NEXT: vmv.v.i v0, 15 ; RV64-NEXT: vslidedown.vi v8, v9, 4, v0.t ; RV64-NEXT: vand.vi v8, v8, 1 ; RV64-NEXT: vmsne.vi v0, v8, 0 @@ -80,13 +80,13 @@ define <4 x i32> @v4i32_v8i32(<8 x i32>) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v10 -; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vsrl.vi v10, v10, 1 ; CHECK-NEXT: vrsub.vi v11, v10, 3 ; CHECK-NEXT: vrgather.vv v10, v8, v11 ; CHECK-NEXT: vsetivli zero, 4, e32, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 4 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu +; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vslidedown.vi v10, v8, 1, v0.t ; CHECK-NEXT: vmv.v.v v8, v10 ; CHECK-NEXT: ret @@ -156,15 +156,15 @@ define <4 x i32> @v4i32_v32i32(<32 x i32>) { ; RV32-NEXT: vse32.v v8, (a1) ; RV32-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 4 -; RV32-NEXT: lw a0, 36(sp) -; RV32-NEXT: vmv.x.s a1, v16 +; RV32-NEXT: vmv.x.s a0, v16 +; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v9, a1 -; RV32-NEXT: lw a1, 120(sp) -; RV32-NEXT: vslide1down.vx v9, v9, a0 -; RV32-NEXT: vmv.x.s a0, v8 -; RV32-NEXT: vslide1down.vx v8, v9, a0 +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: lw a0, 36(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: lw a0, 120(sp) +; RV32-NEXT: vslide1down.vx v8, v8, a0 ; RV32-NEXT: addi sp, s0, -256 ; RV32-NEXT: .cfi_def_cfa sp, 256 ; RV32-NEXT: lw ra, 252(sp) # 4-byte Folded Reload @@ -194,15 +194,15 @@ define <4 x i32> @v4i32_v32i32(<32 x i32>) { ; RV64-NEXT: vse32.v v8, (a1) ; RV64-NEXT: vsetivli zero, 1, e32, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 4 -; RV64-NEXT: lw a0, 36(sp) -; RV64-NEXT: vmv.x.s a1, v16 +; RV64-NEXT: vmv.x.s a0, v16 +; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64-NEXT: vmv.v.x v9, a1 -; RV64-NEXT: lw a1, 120(sp) -; RV64-NEXT: vslide1down.vx v9, v9, a0 -; RV64-NEXT: vmv.x.s a0, v8 -; RV64-NEXT: vslide1down.vx v8, v9, a0 +; RV64-NEXT: vmv.v.x v8, a0 +; RV64-NEXT: lw a0, 36(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: vslide1down.vx v8, v8, a1 +; RV64-NEXT: lw a0, 120(sp) +; RV64-NEXT: vslide1down.vx v8, v8, a0 ; RV64-NEXT: addi sp, s0, -256 ; RV64-NEXT: .cfi_def_cfa sp, 256 ; RV64-NEXT: ld ra, 248(sp) # 8-byte Folded Reload @@ -219,13 +219,13 @@ define <4 x i32> @v4i32_v32i32(<32 x i32>) { define <16 x i1> @v16i1_v8i1(<8 x i1>) { ; CHECK-LABEL: v16i1_v8i1: ; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0) -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vmerge.vim v9, v9, 1, v0 -; CHECK-NEXT: vrgather.vv v10, v9, v8 +; CHECK-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-NEXT: vle8.v v9, (a0) +; CHECK-NEXT: vrgather.vv v10, v8, v9 ; CHECK-NEXT: vmsne.vi v0, v10, 0 ; CHECK-NEXT: ret %2 = shufflevector <8 x i1> %0, <8 x i1> poison, <16 x i32> @@ -235,12 +235,12 @@ define <16 x i1> @v16i1_v8i1(<8 x i1>) { define <8 x i32> @v8i32_v4i32(<4 x i32>) { ; CHECK-LABEL: v8i32_v4i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v9, (a0) ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: lui a1, %hi(.LCPI5_0) +; CHECK-NEXT: addi a1, a1, %lo(.LCPI5_0) ; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v9, (a1) ; CHECK-NEXT: vslidedown.vx v10, v9, a0 ; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, ma ; CHECK-NEXT: vrgatherei16.vv v11, v8, v10 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll index f2353e7d028bd..5c2d61138df13 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat.ll @@ -185,9 +185,9 @@ define <32 x i32> @concat_4xv8i32(<8 x i32> %a, <8 x i32> %b, <8 x i32> %c, <8 x ; VLA-NEXT: vmv2r.v v20, v14 ; VLA-NEXT: vmv2r.v v16, v12 ; VLA-NEXT: vmv2r.v v12, v10 -; VLA-NEXT: li a0, 32 ; VLA-NEXT: vslideup.vi v16, v20, 8 ; VLA-NEXT: vslideup.vi v8, v12, 8 +; VLA-NEXT: li a0, 32 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; VLA-NEXT: vslideup.vi v8, v16, 16 ; VLA-NEXT: ret @@ -212,7 +212,6 @@ define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x ; VLA-NEXT: vmv1r.v v22, v11 ; VLA-NEXT: vmv1r.v v12, v10 ; VLA-NEXT: vmv1r.v v10, v9 -; VLA-NEXT: li a0, 32 ; VLA-NEXT: vslideup.vi v20, v18, 4 ; VLA-NEXT: vslideup.vi v16, v14, 4 ; VLA-NEXT: vslideup.vi v12, v22, 4 @@ -220,6 +219,7 @@ define <32 x i32> @concat_8xv4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x ; VLA-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; VLA-NEXT: vslideup.vi v16, v20, 8 ; VLA-NEXT: vslideup.vi v8, v12, 8 +; VLA-NEXT: li a0, 32 ; VLA-NEXT: vsetvli zero, a0, e32, m8, ta, ma ; VLA-NEXT: vslideup.vi v8, v16, 16 ; VLA-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll index 10dadbc022e02..140d1450e1e5c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll @@ -11,16 +11,15 @@ define void @deinterleave3_0_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0) -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: li a0, 73 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI0_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI0_0) ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v8, 8 +; CHECK-NEXT: vslidedown.vi v9, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vrgather.vv v10, v8, v9 ; CHECK-NEXT: vse8.v v10, (a1) ; CHECK-NEXT: ret @@ -36,16 +35,15 @@ define void @deinterleave3_8_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI1_0) -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: li a0, 146 ; CHECK-NEXT: vmv.s.x v0, a0 +; CHECK-NEXT: lui a0, %hi(.LCPI1_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI1_0) ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v8, 8 +; CHECK-NEXT: vslidedown.vi v9, v8, 8 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v9, v8, v0 +; CHECK-NEXT: vle8.v v9, (a0) ; CHECK-NEXT: vrgather.vv v10, v8, v9 ; CHECK-NEXT: vse8.v v10, (a1) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll index c0c17d4e0623e..0b7a50912b447 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-exact-vlen.ll @@ -186,9 +186,9 @@ define void @shuffle1(ptr %explicit_0, ptr %explicit_1) vscale_range(2,2) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vid.v v10 +; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vsetivli zero, 3, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v11, (a0) -; CHECK-NEXT: vmv.v.i v0, 5 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu ; CHECK-NEXT: vsrl.vi v10, v10, 1 ; CHECK-NEXT: vadd.vi v10, v10, 1 @@ -210,14 +210,14 @@ define <16 x float> @shuffle2(<4 x float> %a) vscale_range(2,2) { ; CHECK-LABEL: shuffle2: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-NEXT: vmv1r.v v12, v8 -; CHECK-NEXT: vmv.v.i v8, 0 +; CHECK-NEXT: vmv.v.i v12, 0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vid.v v13 -; CHECK-NEXT: vadd.vv v13, v13, v13 +; CHECK-NEXT: vid.v v9 +; CHECK-NEXT: vadd.vv v9, v9, v9 +; CHECK-NEXT: vrsub.vi v9, v9, 4 ; CHECK-NEXT: vmv.v.i v0, 6 -; CHECK-NEXT: vrsub.vi v13, v13, 4 -; CHECK-NEXT: vrgather.vv v9, v12, v13, v0.t +; CHECK-NEXT: vrgather.vv v13, v8, v9, v0.t +; CHECK-NEXT: vmv4r.v v8, v12 ; CHECK-NEXT: ret %b = extractelement <4 x float> %a, i32 2 %c = insertelement <16 x float> , float %b, i32 5 @@ -255,11 +255,10 @@ define i64 @extract_any_extend_vector_inreg_v16i64(<16 x i64> %a0, i32 %a1) vsca ; RV64-NEXT: addi s0, sp, 256 ; RV64-NEXT: .cfi_def_cfa s0, 0 ; RV64-NEXT: andi sp, sp, -128 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; RV64-NEXT: vmv.v.i v0, 1 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vmv.v.i v16, 0 ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, mu +; RV64-NEXT: vmv.v.i v0, 1 ; RV64-NEXT: vrgather.vi v18, v15, 1, v0.t ; RV64-NEXT: mv s2, sp ; RV64-NEXT: vs8r.v v16, (s2) @@ -291,9 +290,9 @@ define <4 x double> @shuffles_add(<4 x double> %0, <4 x double> %1) vscale_range ; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; CHECK-NEXT: vmv1r.v v13, v10 ; CHECK-NEXT: vslideup.vi v13, v11, 1 +; CHECK-NEXT: vrgather.vi v12, v9, 0 ; CHECK-NEXT: vmv1r.v v8, v9 ; CHECK-NEXT: vmv.v.i v0, 1 -; CHECK-NEXT: vrgather.vi v12, v9, 0 ; CHECK-NEXT: vmv1r.v v9, v11 ; CHECK-NEXT: vrgather.vi v9, v10, 1, v0.t ; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma @@ -326,8 +325,8 @@ define <16 x i32> @m4_linear_num_of_shuffles_in_chunks(<16 x i32> %0) vscale_ran ; CHECK-LABEL: m4_linear_num_of_shuffles_in_chunks: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, mu -; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vi v12, v10, 0 +; CHECK-NEXT: vmv.v.i v0, 8 ; CHECK-NEXT: vrgather.vi v12, v11, 0, v0.t ; CHECK-NEXT: vrgather.vi v14, v8, 2 ; CHECK-NEXT: vrgather.vi v15, v10, 3 @@ -348,16 +347,18 @@ define i64 @multi_chunks_shuffle(<32 x i32> %0) vscale_range(8,8) { ; RV32-NEXT: vwsubu.vx v12, v10, a0 ; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vmv.v.x v10, a0 -; RV32-NEXT: lui a0, 61681 -; RV32-NEXT: addi a0, a0, -241 ; RV32-NEXT: vand.vx v12, v12, a1 ; RV32-NEXT: vand.vx v10, v10, a1 ; RV32-NEXT: vsrl.vv v12, v8, v12 ; RV32-NEXT: vsll.vv v8, v8, v10 -; RV32-NEXT: vmv.s.x v0, a0 -; RV32-NEXT: vor.vv v8, v8, v12 ; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 +; RV32-NEXT: lui a0, 61681 +; RV32-NEXT: addi a0, a0, -241 +; RV32-NEXT: vsetivli zero, 16, e64, m2, ta, ma +; RV32-NEXT: vor.vv v8, v8, v12 +; RV32-NEXT: vmv.s.x v0, a0 +; RV32-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV32-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV32-NEXT: vrgather.vi v10, v8, 2 ; RV32-NEXT: vor.vv v8, v8, v10 @@ -373,12 +374,12 @@ define i64 @multi_chunks_shuffle(<32 x i32> %0) vscale_range(8,8) { ; RV64-NEXT: vsetivli zero, 16, e64, m2, ta, ma ; RV64-NEXT: vsrl.vx v10, v8, a0 ; RV64-NEXT: vsll.vx v8, v8, a0 -; RV64-NEXT: lui a0, 61681 -; RV64-NEXT: addi a0, a0, -241 ; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vsetvli a0, zero, e32, m2, ta, ma ; RV64-NEXT: vmv.v.i v10, 0 +; RV64-NEXT: lui a0, 61681 +; RV64-NEXT: addi a0, a0, -241 +; RV64-NEXT: vmv.s.x v0, a0 ; RV64-NEXT: vmerge.vvm v8, v10, v8, v0 ; RV64-NEXT: vrgather.vi v10, v8, 2 ; RV64-NEXT: vor.vv v8, v8, v10 @@ -437,11 +438,9 @@ define void @shuffle_3_input_vectors() vscale_range(4,4) { ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.i v8, 1 -; CHECK-NEXT: vsetivli zero, 1, e8, mf8, ta, ma -; CHECK-NEXT: vmv.v.i v0, 6 -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vmv.v.i v16, 0 ; CHECK-NEXT: vsetivli zero, 4, e64, m1, ta, mu +; CHECK-NEXT: vmv.v.i v0, 6 ; CHECK-NEXT: vslidedown.vi v20, v8, 1, v0.t ; CHECK-NEXT: vslideup.vi v20, v9, 3 ; CHECK-NEXT: vslidedown.vi v21, v9, 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll index c222626a166fe..eb0ee5773962b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-reverse.ll @@ -1161,8 +1161,8 @@ define <32 x half> @reverse_v32f16_2(<16 x half> %a) { ; CHECK-NEXT: vrgather.vv v10, v9, v12 ; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: vrgather.vv v8, v9, v12 ; CHECK-NEXT: addi a0, a0, -32 +; CHECK-NEXT: vrgather.vv v8, v9, v12 ; CHECK-NEXT: vmv.v.v v9, v8 ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma ; CHECK-NEXT: vslidedown.vx v8, v8, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 3c28e978842b9..72a62627755dd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -179,9 +179,9 @@ define void @vnsrl_32_i32(ptr %in, ptr %out) { ; ZVE32F: # %bb.0: # %entry ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVE32F-NEXT: vle32.v v8, (a0) -; ZVE32F-NEXT: vmv.v.i v0, 1 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; ZVE32F-NEXT: vmv.v.i v0, 1 ; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a1) ; ZVE32F-NEXT: ret @@ -233,9 +233,9 @@ define void @vnsrl_32_float(ptr %in, ptr %out) { ; ZVE32F: # %bb.0: # %entry ; ZVE32F-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; ZVE32F-NEXT: vle32.v v8, (a0) -; ZVE32F-NEXT: vmv.v.i v0, 1 ; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, mu ; ZVE32F-NEXT: vslidedown.vi v9, v8, 2 +; ZVE32F-NEXT: vmv.v.i v0, 1 ; ZVE32F-NEXT: vrgather.vi v9, v8, 1, v0.t ; ZVE32F-NEXT: vse32.v v9, (a1) ; ZVE32F-NEXT: ret @@ -276,9 +276,9 @@ define void @vnsrl_64_i64(ptr %in, ptr %out) { ; V: # %bb.0: # %entry ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; V-NEXT: vle64.v v8, (a0) -; V-NEXT: vmv.v.i v0, 1 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; V-NEXT: vslidedown.vi v9, v8, 2 +; V-NEXT: vmv.v.i v0, 1 ; V-NEXT: vrgather.vi v9, v8, 1, v0.t ; V-NEXT: vse64.v v9, (a1) ; V-NEXT: ret @@ -327,9 +327,9 @@ define void @vnsrl_64_double(ptr %in, ptr %out) { ; V: # %bb.0: # %entry ; V-NEXT: vsetivli zero, 4, e64, m1, ta, ma ; V-NEXT: vle64.v v8, (a0) -; V-NEXT: vmv.v.i v0, 1 ; V-NEXT: vsetivli zero, 2, e64, m1, ta, mu ; V-NEXT: vslidedown.vi v9, v8, 2 +; V-NEXT: vmv.v.i v0, 1 ; V-NEXT: vrgather.vi v9, v8, 1, v0.t ; V-NEXT: vse64.v v9, (a1) ; V-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll index a2d41de5d1853..ba3b994de46f8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-sitofp-vp.ll @@ -390,9 +390,9 @@ declare <32 x double> @llvm.vp.sitofp.v32f64.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x double> @vsitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsitofp_v32f64_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll index 391117c72ece7..3a3d417868dfe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store-merge-crash.ll @@ -14,8 +14,8 @@ define void @baz() nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lui a0, %hi(foo) ; CHECK-NEXT: addi a1, a0, %lo(foo) -; CHECK-NEXT: lw a1, 4(a1) ; CHECK-NEXT: lw a0, %lo(foo)(a0) +; CHECK-NEXT: lw a1, 4(a1) ; CHECK-NEXT: lui a2, %hi(bar) ; CHECK-NEXT: sw a1, %lo(bar)(a2) ; CHECK-NEXT: addi a1, a2, %lo(bar) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll index 29d9a8a9b060c..0510cac7ffd0e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-asm.ll @@ -638,10 +638,10 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; V-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; V-NEXT: .LBB12_1: # %bb2 ; V-NEXT: # =>This Inner Loop Header: Depth=1 -; V-NEXT: vlse64.v v8, (a1), a3 -; V-NEXT: addi a4, a1, 80 -; V-NEXT: vlse64.v v9, (a4), a3 ; V-NEXT: addi a4, a0, 16 +; V-NEXT: addi a5, a1, 80 +; V-NEXT: vlse64.v v8, (a1), a3 +; V-NEXT: vlse64.v v9, (a5), a3 ; V-NEXT: vse64.v v8, (a0) ; V-NEXT: addi a0, a0, 32 ; V-NEXT: vse64.v v9, (a4) @@ -662,6 +662,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; ZVE32F-NEXT: mul a6, a3, a5 ; ZVE32F-NEXT: mul a7, a2, a5 ; ZVE32F-NEXT: addi a2, a2, 4 +; ZVE32F-NEXT: addi a3, a3, 4 ; ZVE32F-NEXT: add a6, a1, a6 ; ZVE32F-NEXT: add a7, a1, a7 ; ZVE32F-NEXT: ld t0, 0(a7) @@ -673,7 +674,6 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; ZVE32F-NEXT: sd a7, 16(a0) ; ZVE32F-NEXT: sd a6, 24(a0) ; ZVE32F-NEXT: addi a0, a0, 32 -; ZVE32F-NEXT: addi a3, a3, 4 ; ZVE32F-NEXT: bne a0, a4, .LBB12_1 ; ZVE32F-NEXT: # %bb.2: # %bb18 ; ZVE32F-NEXT: ret @@ -686,10 +686,10 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; OPTZVE32F-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; OPTZVE32F-NEXT: .LBB12_1: # %bb2 ; OPTZVE32F-NEXT: # =>This Inner Loop Header: Depth=1 -; OPTZVE32F-NEXT: vlse64.v v8, (a1), a3 -; OPTZVE32F-NEXT: addi a4, a1, 80 -; OPTZVE32F-NEXT: vlse64.v v9, (a4), a3 ; OPTZVE32F-NEXT: addi a4, a0, 16 +; OPTZVE32F-NEXT: addi a5, a1, 80 +; OPTZVE32F-NEXT: vlse64.v v8, (a1), a3 +; OPTZVE32F-NEXT: vlse64.v v9, (a5), a3 ; OPTZVE32F-NEXT: vse64.v v8, (a0) ; OPTZVE32F-NEXT: addi a0, a0, 32 ; OPTZVE32F-NEXT: vse64.v v9, (a4) @@ -710,6 +710,7 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; OPTV-NEXT: mul a6, a3, a5 ; OPTV-NEXT: mul a7, a2, a5 ; OPTV-NEXT: addi a2, a2, 4 +; OPTV-NEXT: addi a3, a3, 4 ; OPTV-NEXT: add a6, a1, a6 ; OPTV-NEXT: add a7, a1, a7 ; OPTV-NEXT: ld t0, 0(a7) @@ -721,7 +722,6 @@ define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptur ; OPTV-NEXT: sd a7, 16(a0) ; OPTV-NEXT: sd a6, 24(a0) ; OPTV-NEXT: addi a0, a0, 32 -; OPTV-NEXT: addi a3, a3, 4 ; OPTV-NEXT: bne a0, a4, .LBB12_1 ; OPTV-NEXT: # %bb.2: # %bb18 ; OPTV-NEXT: ret @@ -791,14 +791,14 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu ; ZVE32F-NEXT: mul t2, a3, a5 ; ZVE32F-NEXT: mul t3, a2, a5 ; ZVE32F-NEXT: addi a2, a2, 4 -; ZVE32F-NEXT: addi a1, a1, 32 +; ZVE32F-NEXT: addi a3, a3, 4 ; ZVE32F-NEXT: add t2, a0, t2 ; ZVE32F-NEXT: add t3, a0, t3 ; ZVE32F-NEXT: sd a6, 0(t3) ; ZVE32F-NEXT: sd a7, 0(t2) ; ZVE32F-NEXT: sd t0, 80(t3) ; ZVE32F-NEXT: sd t1, 80(t2) -; ZVE32F-NEXT: addi a3, a3, 4 +; ZVE32F-NEXT: addi a1, a1, 32 ; ZVE32F-NEXT: bne a1, a4, .LBB13_1 ; ZVE32F-NEXT: # %bb.2: # %bb18 ; ZVE32F-NEXT: ret @@ -839,14 +839,14 @@ define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocaptu ; OPTV-NEXT: mul t2, a3, a5 ; OPTV-NEXT: mul t3, a2, a5 ; OPTV-NEXT: addi a2, a2, 4 -; OPTV-NEXT: addi a1, a1, 32 +; OPTV-NEXT: addi a3, a3, 4 ; OPTV-NEXT: add t2, a0, t2 ; OPTV-NEXT: add t3, a0, t3 ; OPTV-NEXT: sd a6, 0(t3) ; OPTV-NEXT: sd a7, 0(t2) ; OPTV-NEXT: sd t0, 80(t3) ; OPTV-NEXT: sd t1, 80(t2) -; OPTV-NEXT: addi a3, a3, 4 +; OPTV-NEXT: addi a1, a1, 32 ; OPTV-NEXT: bne a1, a4, .LBB13_1 ; OPTV-NEXT: # %bb.2: # %bb18 ; OPTV-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll index 4b7f82f94f5e4..fe86344ec73fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpload.ll @@ -609,11 +609,11 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: .LBB47_2: ; CHECK-RV32-NEXT: mul a6, a3, a2 ; CHECK-RV32-NEXT: addi a5, a4, -32 +; CHECK-RV32-NEXT: add a6, a1, a6 ; CHECK-RV32-NEXT: sltu a7, a4, a5 ; CHECK-RV32-NEXT: addi a7, a7, -1 ; CHECK-RV32-NEXT: and a7, a7, a5 ; CHECK-RV32-NEXT: li a5, 16 -; CHECK-RV32-NEXT: add a6, a1, a6 ; CHECK-RV32-NEXT: bltu a7, a5, .LBB47_4 ; CHECK-RV32-NEXT: # %bb.3: ; CHECK-RV32-NEXT: li a7, 16 @@ -636,16 +636,16 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV32-NEXT: add a5, a1, a5 ; CHECK-RV32-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV32-NEXT: addi a3, a0, 128 ; CHECK-RV32-NEXT: vmv1r.v v0, v8 ; CHECK-RV32-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV32-NEXT: vlse64.v v8, (a1), a2, v0.t -; CHECK-RV32-NEXT: addi a1, a0, 128 -; CHECK-RV32-NEXT: addi a2, a0, 256 +; CHECK-RV32-NEXT: addi a1, a0, 256 ; CHECK-RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV32-NEXT: vse64.v v8, (a0) -; CHECK-RV32-NEXT: vse64.v v24, (a1) +; CHECK-RV32-NEXT: vse64.v v24, (a3) ; CHECK-RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV32-NEXT: vse64.v v16, (a2) +; CHECK-RV32-NEXT: vse64.v v16, (a1) ; CHECK-RV32-NEXT: ret ; ; CHECK-RV64-LABEL: strided_load_v33f64: @@ -660,11 +660,11 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: .LBB47_2: ; CHECK-RV64-NEXT: mul a6, a4, a2 ; CHECK-RV64-NEXT: addi a5, a3, -32 +; CHECK-RV64-NEXT: add a6, a1, a6 ; CHECK-RV64-NEXT: sltu a7, a3, a5 ; CHECK-RV64-NEXT: addi a7, a7, -1 ; CHECK-RV64-NEXT: and a7, a7, a5 ; CHECK-RV64-NEXT: li a5, 16 -; CHECK-RV64-NEXT: add a6, a1, a6 ; CHECK-RV64-NEXT: bltu a7, a5, .LBB47_4 ; CHECK-RV64-NEXT: # %bb.3: ; CHECK-RV64-NEXT: li a7, 16 @@ -687,16 +687,16 @@ define <33 x double> @strided_load_v33f64(ptr %ptr, i64 %stride, <33 x i1> %mask ; CHECK-RV64-NEXT: add a5, a1, a5 ; CHECK-RV64-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v24, (a5), a2, v0.t +; CHECK-RV64-NEXT: addi a4, a0, 128 ; CHECK-RV64-NEXT: vmv1r.v v0, v8 ; CHECK-RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-RV64-NEXT: vlse64.v v8, (a1), a2, v0.t -; CHECK-RV64-NEXT: addi a1, a0, 128 -; CHECK-RV64-NEXT: addi a2, a0, 256 +; CHECK-RV64-NEXT: addi a1, a0, 256 ; CHECK-RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-RV64-NEXT: vse64.v v8, (a0) -; CHECK-RV64-NEXT: vse64.v v24, (a1) +; CHECK-RV64-NEXT: vse64.v v24, (a4) ; CHECK-RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma -; CHECK-RV64-NEXT: vse64.v v16, (a2) +; CHECK-RV64-NEXT: vse64.v v16, (a1) ; CHECK-RV64-NEXT: ret %v = call <33 x double> @llvm.experimental.vp.strided.load.v33f64.p0.i64(ptr %ptr, i64 %stride, <33 x i1> %mask, i32 %evl) ret <33 x double> %v diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll index 7ca329835b7ac..733c850d64011 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-vpstore.ll @@ -472,9 +472,9 @@ define void @strided_store_v32f64(<32 x double> %v, ptr %ptr, i32 signext %strid ; CHECK-NEXT: addi a3, a2, -16 ; CHECK-NEXT: sltu a2, a2, a3 ; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: and a2, a2, a3 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll index a91dee1cb245f..dd5630e165f19 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-trunc-vp.ll @@ -55,8 +55,8 @@ define <128 x i7> @vtrunc_v128i7_v128i16(<128 x i16> %a, <128 x i1> %m, i32 zero ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: vslidedown.vi v12, v0, 8 +; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: mv a2, a0 ; CHECK-NEXT: bltu a0, a1, .LBB4_2 ; CHECK-NEXT: # %bb.1: @@ -245,64 +245,64 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vslidedown.vi v6, v0, 8 -; CHECK-NEXT: addi a2, a1, 512 -; CHECK-NEXT: addi a3, a1, 640 +; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vi v26, v0, 4 +; CHECK-NEXT: addi a3, a1, 128 +; CHECK-NEXT: addi a2, a1, 640 ; CHECK-NEXT: addi a4, a7, -64 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a2) +; CHECK-NEXT: sltu a2, a7, a4 +; CHECK-NEXT: addi a2, a2, -1 +; CHECK-NEXT: and a4, a2, a4 +; CHECK-NEXT: addi a2, a4, -32 +; CHECK-NEXT: sltu a5, a4, a2 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a5, a5, a2 +; CHECK-NEXT: addi a2, a5, -16 +; CHECK-NEXT: sltu a6, a5, a2 +; CHECK-NEXT: addi a6, a6, -1 +; CHECK-NEXT: and a2, a6, a2 +; CHECK-NEXT: addi a6, a1, 512 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v27, v6, 4 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a3) -; CHECK-NEXT: sltu a3, a7, a4 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v27, 2 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a4, a3, a4 -; CHECK-NEXT: addi a3, a4, -32 -; CHECK-NEXT: sltu a5, a4, a3 -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: and a3, a5, a3 -; CHECK-NEXT: addi a5, a3, -16 -; CHECK-NEXT: sltu a6, a3, a5 -; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma +; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 4 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: addi a5, a1, 128 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 4 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 16 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v7, 4 -; CHECK-NEXT: bltu a3, a2, .LBB16_2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v8, (a6) +; CHECK-NEXT: bltu a5, a2, .LBB16_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: li a5, 16 ; CHECK-NEXT: .LBB16_2: ; CHECK-NEXT: vmv1r.v v0, v27 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: li a6, 56 -; CHECK-NEXT: mul a5, a5, a6 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v27, v26, 2 -; CHECK-NEXT: li a5, 64 -; CHECK-NEXT: vsetvli zero, a3, e32, m4, ta, ma +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a3) +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: li a6, 56 +; CHECK-NEXT: mul a3, a3, a6 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma ; CHECK-NEXT: vnsrl.wi v16, v8, 0, v0.t ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: slli a3, a3, 6 ; CHECK-NEXT: add a3, sp, a3 ; CHECK-NEXT: addi a3, a3, 16 ; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: li a3, 64 ; CHECK-NEXT: mv a6, a7 -; CHECK-NEXT: bltu a7, a5, .LBB16_4 +; CHECK-NEXT: bltu a7, a3, .LBB16_4 ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a6, 64 ; CHECK-NEXT: .LBB16_4: @@ -343,13 +343,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: li a6, 16 ; CHECK-NEXT: .LBB16_6: ; CHECK-NEXT: vmv1r.v v0, v26 +; CHECK-NEXT: addi a1, a1, 256 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v26, v6, 2 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a5) ; CHECK-NEXT: addi a5, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a1, 256 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v26, v6, 2 ; CHECK-NEXT: csrr a5, vlenb ; CHECK-NEXT: li t0, 48 ; CHECK-NEXT: mul a5, a5, t0 @@ -369,13 +369,13 @@ define <128 x i32> @vtrunc_v128i32_v128i64(<128 x i64> %a, <128 x i1> %m, i32 ze ; CHECK-NEXT: # %bb.7: ; CHECK-NEXT: li a5, 32 ; CHECK-NEXT: .LBB16_8: +; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) ; CHECK-NEXT: addi a1, a5, -16 ; CHECK-NEXT: sltu a5, a5, a1 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a1, a5, a1 -; CHECK-NEXT: vmv1r.v v0, v26 ; CHECK-NEXT: addi a5, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma @@ -543,8 +543,8 @@ define <32 x i32> @vtrunc_v32i32_v32i64(<32 x i64> %a, <32 x i1> %m, i32 zeroext ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vmv8r.v v24, v8 -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vslidedown.vi v12, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB17_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll index a0d5d2ccc848d..32aeb6300d17d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-uitofp-vp.ll @@ -390,9 +390,9 @@ declare <32 x double> @llvm.vp.uitofp.v32f64.v32i64(<32 x i64>, <32 x i1>, i32) define <32 x double> @vuitofp_v32f64_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vuitofp_v32f64_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB25_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index 6d9f69f436fc4..8e7f6666fb4ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -84,10 +84,10 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV32-SLOW-NEXT: # %bb.1: # %cond.load ; RV32-SLOW-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-SLOW-NEXT: vmv.x.s a1, v8 -; RV32-SLOW-NEXT: lbu a2, 1(a1) -; RV32-SLOW-NEXT: lbu a1, 0(a1) -; RV32-SLOW-NEXT: slli a2, a2, 8 -; RV32-SLOW-NEXT: or a1, a2, a1 +; RV32-SLOW-NEXT: lbu a2, 0(a1) +; RV32-SLOW-NEXT: lbu a1, 1(a1) +; RV32-SLOW-NEXT: slli a1, a1, 8 +; RV32-SLOW-NEXT: or a1, a1, a2 ; RV32-SLOW-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV32-SLOW-NEXT: vmv.s.x v9, a1 ; RV32-SLOW-NEXT: .LBB4_2: # %else @@ -97,10 +97,10 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV32-SLOW-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-SLOW-NEXT: vslidedown.vi v8, v8, 1 ; RV32-SLOW-NEXT: vmv.x.s a0, v8 -; RV32-SLOW-NEXT: lbu a1, 1(a0) -; RV32-SLOW-NEXT: lbu a0, 0(a0) -; RV32-SLOW-NEXT: slli a1, a1, 8 -; RV32-SLOW-NEXT: or a0, a1, a0 +; RV32-SLOW-NEXT: lbu a1, 0(a0) +; RV32-SLOW-NEXT: lbu a0, 1(a0) +; RV32-SLOW-NEXT: slli a0, a0, 8 +; RV32-SLOW-NEXT: or a0, a0, a1 ; RV32-SLOW-NEXT: vmv.s.x v8, a0 ; RV32-SLOW-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV32-SLOW-NEXT: vslideup.vi v9, v8, 1 @@ -118,10 +118,10 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV64-SLOW-NEXT: # %bb.1: # %cond.load ; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-SLOW-NEXT: vmv.x.s a1, v8 -; RV64-SLOW-NEXT: lbu a2, 1(a1) -; RV64-SLOW-NEXT: lbu a1, 0(a1) -; RV64-SLOW-NEXT: slli a2, a2, 8 -; RV64-SLOW-NEXT: or a1, a2, a1 +; RV64-SLOW-NEXT: lbu a2, 0(a1) +; RV64-SLOW-NEXT: lbu a1, 1(a1) +; RV64-SLOW-NEXT: slli a1, a1, 8 +; RV64-SLOW-NEXT: or a1, a1, a2 ; RV64-SLOW-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64-SLOW-NEXT: vmv.s.x v9, a1 ; RV64-SLOW-NEXT: .LBB4_2: # %else @@ -131,10 +131,10 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV64-SLOW-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 1 ; RV64-SLOW-NEXT: vmv.x.s a0, v8 -; RV64-SLOW-NEXT: lbu a1, 1(a0) -; RV64-SLOW-NEXT: lbu a0, 0(a0) -; RV64-SLOW-NEXT: slli a1, a1, 8 -; RV64-SLOW-NEXT: or a0, a1, a0 +; RV64-SLOW-NEXT: lbu a1, 0(a0) +; RV64-SLOW-NEXT: lbu a0, 1(a0) +; RV64-SLOW-NEXT: slli a0, a0, 8 +; RV64-SLOW-NEXT: or a0, a0, a1 ; RV64-SLOW-NEXT: vmv.s.x v8, a0 ; RV64-SLOW-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV64-SLOW-NEXT: vslideup.vi v9, v8, 1 @@ -204,10 +204,10 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> % ; RV64-SLOW-NEXT: # %bb.1: # %cond.load ; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-SLOW-NEXT: vmv.x.s a1, v8 -; RV64-SLOW-NEXT: lwu a2, 4(a1) -; RV64-SLOW-NEXT: lwu a1, 0(a1) -; RV64-SLOW-NEXT: slli a2, a2, 32 -; RV64-SLOW-NEXT: or a1, a2, a1 +; RV64-SLOW-NEXT: lwu a2, 0(a1) +; RV64-SLOW-NEXT: lwu a1, 4(a1) +; RV64-SLOW-NEXT: slli a1, a1, 32 +; RV64-SLOW-NEXT: or a1, a1, a2 ; RV64-SLOW-NEXT: vmv.s.x v9, a1 ; RV64-SLOW-NEXT: .LBB5_2: # %else ; RV64-SLOW-NEXT: andi a0, a0, 2 @@ -216,10 +216,10 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> % ; RV64-SLOW-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-SLOW-NEXT: vslidedown.vi v8, v8, 1 ; RV64-SLOW-NEXT: vmv.x.s a0, v8 -; RV64-SLOW-NEXT: lwu a1, 4(a0) -; RV64-SLOW-NEXT: lwu a0, 0(a0) -; RV64-SLOW-NEXT: slli a1, a1, 32 -; RV64-SLOW-NEXT: or a0, a1, a0 +; RV64-SLOW-NEXT: lwu a1, 0(a0) +; RV64-SLOW-NEXT: lwu a0, 4(a0) +; RV64-SLOW-NEXT: slli a0, a0, 32 +; RV64-SLOW-NEXT: or a0, a0, a1 ; RV64-SLOW-NEXT: vmv.s.x v8, a0 ; RV64-SLOW-NEXT: vslideup.vi v9, v8, 1 ; RV64-SLOW-NEXT: .LBB5_4: # %else2 @@ -489,12 +489,12 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV32-SLOW-NEXT: # implicit-def: $v8 ; RV32-SLOW-NEXT: beqz a3, .LBB8_2 ; RV32-SLOW-NEXT: # %bb.1: # %cond.load -; RV32-SLOW-NEXT: lbu a3, 1(a0) -; RV32-SLOW-NEXT: lbu a4, 0(a0) +; RV32-SLOW-NEXT: lbu a3, 0(a0) +; RV32-SLOW-NEXT: lbu a4, 1(a0) ; RV32-SLOW-NEXT: lbu a5, 2(a0) ; RV32-SLOW-NEXT: lbu a6, 3(a0) -; RV32-SLOW-NEXT: slli a3, a3, 8 -; RV32-SLOW-NEXT: or a3, a3, a4 +; RV32-SLOW-NEXT: slli a4, a4, 8 +; RV32-SLOW-NEXT: or a3, a4, a3 ; RV32-SLOW-NEXT: slli a5, a5, 16 ; RV32-SLOW-NEXT: slli a6, a6, 24 ; RV32-SLOW-NEXT: or a4, a6, a5 @@ -505,12 +505,12 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV32-SLOW-NEXT: andi a2, a2, 2 ; RV32-SLOW-NEXT: beqz a2, .LBB8_4 ; RV32-SLOW-NEXT: # %bb.3: # %cond.load1 -; RV32-SLOW-NEXT: lbu a2, 5(a0) -; RV32-SLOW-NEXT: lbu a3, 4(a0) +; RV32-SLOW-NEXT: lbu a2, 4(a0) +; RV32-SLOW-NEXT: lbu a3, 5(a0) ; RV32-SLOW-NEXT: lbu a4, 6(a0) ; RV32-SLOW-NEXT: lbu a0, 7(a0) -; RV32-SLOW-NEXT: slli a2, a2, 8 -; RV32-SLOW-NEXT: or a2, a2, a3 +; RV32-SLOW-NEXT: slli a3, a3, 8 +; RV32-SLOW-NEXT: or a2, a3, a2 ; RV32-SLOW-NEXT: slli a4, a4, 16 ; RV32-SLOW-NEXT: slli a0, a0, 24 ; RV32-SLOW-NEXT: or a0, a0, a4 @@ -533,12 +533,12 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV64-SLOW-NEXT: # implicit-def: $v8 ; RV64-SLOW-NEXT: beqz a3, .LBB8_2 ; RV64-SLOW-NEXT: # %bb.1: # %cond.load -; RV64-SLOW-NEXT: lbu a3, 1(a0) -; RV64-SLOW-NEXT: lbu a4, 0(a0) +; RV64-SLOW-NEXT: lbu a3, 0(a0) +; RV64-SLOW-NEXT: lbu a4, 1(a0) ; RV64-SLOW-NEXT: lbu a5, 2(a0) ; RV64-SLOW-NEXT: lb a6, 3(a0) -; RV64-SLOW-NEXT: slli a3, a3, 8 -; RV64-SLOW-NEXT: or a3, a3, a4 +; RV64-SLOW-NEXT: slli a4, a4, 8 +; RV64-SLOW-NEXT: or a3, a4, a3 ; RV64-SLOW-NEXT: slli a5, a5, 16 ; RV64-SLOW-NEXT: slli a6, a6, 24 ; RV64-SLOW-NEXT: or a4, a6, a5 @@ -549,12 +549,12 @@ define void @masked_load_v2i32_align1(ptr %a, <2 x i32> %m, ptr %res_ptr) nounwi ; RV64-SLOW-NEXT: andi a2, a2, 2 ; RV64-SLOW-NEXT: beqz a2, .LBB8_4 ; RV64-SLOW-NEXT: # %bb.3: # %cond.load1 -; RV64-SLOW-NEXT: lbu a2, 5(a0) -; RV64-SLOW-NEXT: lbu a3, 4(a0) +; RV64-SLOW-NEXT: lbu a2, 4(a0) +; RV64-SLOW-NEXT: lbu a3, 5(a0) ; RV64-SLOW-NEXT: lbu a4, 6(a0) ; RV64-SLOW-NEXT: lb a0, 7(a0) -; RV64-SLOW-NEXT: slli a2, a2, 8 -; RV64-SLOW-NEXT: or a2, a2, a3 +; RV64-SLOW-NEXT: slli a3, a3, 8 +; RV64-SLOW-NEXT: or a2, a3, a2 ; RV64-SLOW-NEXT: slli a4, a4, 16 ; RV64-SLOW-NEXT: slli a0, a0, 24 ; RV64-SLOW-NEXT: or a0, a0, a4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll index 7ee8179acfdb9..e56b7c75c41d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vadd-vp.ll @@ -366,12 +366,12 @@ define <256 x i8> @vadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: addi a3, a1, -128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: addi a0, a1, -128 -; CHECK-NEXT: sltu a3, a1, a0 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a0, a3, a0 +; CHECK-NEXT: sltu a0, a1, a3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a3 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: bltu a1, a2, .LBB32_2 @@ -1357,9 +1357,9 @@ declare <32 x i64> @llvm.vp.add.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vadd_vx_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB108_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll index fa82065f3b413..9678fa87dc9b1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vcopysign-vp.ll @@ -298,46 +298,36 @@ define <32 x double> @vfsgnj_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: bltu a2, a3, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfsgnj.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfsgnj.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfsgnj.vv v16, v16, v24, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll index 08f486b601328..990cf03a2e9b5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfabs-vp.ll @@ -363,9 +363,9 @@ declare <32 x double> @llvm.vp.fabs.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfabs_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfabs_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll index bde842dcc7600..a6c51ced93ddc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfma-vp.ll @@ -849,35 +849,35 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a2, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a2) -; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: addi a3, a2, 128 +; CHECK-NEXT: addi a5, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: bltu a4, a1, .LBB50_2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a2) +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: vle64.v v8, (a3) +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a5) +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: bltu a4, a2, .LBB50_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB50_2: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 @@ -893,16 +893,16 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 @@ -941,26 +941,26 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> % ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a2, 128 +; CHECK-NEXT: addi a3, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a2) -; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v24, (a2) +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: vle64.v v24, (a3) ; CHECK-NEXT: vle64.v v0, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a4 -; CHECK-NEXT: bltu a4, a1, .LBB51_2 +; CHECK-NEXT: bltu a4, a2, .LBB51_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB51_2: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v0, v8, v16 ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll index b37c47a32ba21..13c8077a84c56 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmax-vp.ll @@ -390,46 +390,36 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: bltu a2, a3, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmax.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfmax.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmax.vv v16, v16, v24, v0.t +; CHECK-NEXT: vfmax.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll index 261523e8ace50..fd43b8bbaf185 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmin-vp.ll @@ -390,46 +390,36 @@ define <32 x double> @vfmin_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v7, v0, 2 +; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v16, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 2 -; CHECK-NEXT: bltu a2, a1, .LBB26_2 +; CHECK-NEXT: mv a1, a2 +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: bltu a2, a3, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB26_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmin.vv v8, v8, v16, v0.t +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfmin.vv v8, v8, v24, v0.t ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a0, a1, a0 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmin.vv v16, v16, v24, v0.t +; CHECK-NEXT: vfmin.vv v16, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll index a5d9b3439e29b..eb4ce757a8385 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfmuladd-vp.ll @@ -621,35 +621,35 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a2, 128 -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v24, (a2) -; CHECK-NEXT: addi a2, a0, 128 -; CHECK-NEXT: vle64.v v8, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a2) -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a4 +; CHECK-NEXT: addi a3, a2, 128 +; CHECK-NEXT: addi a5, a0, 128 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v7, v0, 2 -; CHECK-NEXT: bltu a4, a1, .LBB50_2 +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vle64.v v16, (a2) +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: vle64.v v8, (a3) +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a5) +; CHECK-NEXT: addi a3, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: bltu a4, a2, .LBB50_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB50_2: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 @@ -665,16 +665,16 @@ define <32 x double> @vfma_vv_v32f64(<32 x double> %va, <32 x double> %b, <32 x ; CHECK-NEXT: mul a1, a1, a2 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v8, v16, v24, v0.t +; CHECK-NEXT: vfmadd.vv v8, v24, v16, v0.t ; CHECK-NEXT: vmv.v.v v16, v8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 @@ -713,26 +713,26 @@ define <32 x double> @vfma_vv_v32f64_unmasked(<32 x double> %va, <32 x double> % ; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a2, 128 +; CHECK-NEXT: addi a3, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a2) -; CHECK-NEXT: addi a2, a0, 128 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle64.v v24, (a2) +; CHECK-NEXT: mv a1, a4 +; CHECK-NEXT: vle64.v v24, (a3) ; CHECK-NEXT: vle64.v v0, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a4 -; CHECK-NEXT: bltu a4, a1, .LBB51_2 +; CHECK-NEXT: bltu a4, a2, .LBB51_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB51_2: -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vfmadd.vv v0, v8, v16 ; CHECK-NEXT: addi a0, a4, -16 ; CHECK-NEXT: sltu a1, a4, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll index 968fd9f9bab80..a3853d19c3ef9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfneg-vp.ll @@ -355,9 +355,9 @@ declare <32 x double> @llvm.vp.fneg.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfneg_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfneg_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll index 6244419de65b1..d87c1e332ce65 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vfsqrt-vp.ll @@ -379,9 +379,9 @@ declare <32 x double> @llvm.vp.sqrt.v32f64(<32 x double>, <32 x i1>, i32) define <32 x double> @vfsqrt_vv_v32f64(<32 x double> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vfsqrt_vv_v32f64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB26_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll index fec54b36042fa..28ac46cd5fc88 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmax-vp.ll @@ -270,12 +270,12 @@ define <256 x i8> @vmax_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zero ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: addi a4, a2, -128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) -; CHECK-NEXT: addi a1, a2, -128 -; CHECK-NEXT: sltu a4, a2, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 +; CHECK-NEXT: sltu a1, a2, a4 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a4 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmax.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB22_2 @@ -1029,9 +1029,9 @@ declare <32 x i64> @llvm.vp.smax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmax_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmax_vx_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB74_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll index 7ca0dbd9adffc..b7555f4b3588b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmaxu-vp.ll @@ -269,12 +269,12 @@ define <256 x i8> @vmaxu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zer ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: addi a4, a2, -128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) -; CHECK-NEXT: addi a1, a2, -128 -; CHECK-NEXT: sltu a4, a2, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 +; CHECK-NEXT: sltu a1, a2, a4 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a4 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmaxu.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB22_2 @@ -1028,9 +1028,9 @@ declare <32 x i64> @llvm.vp.umax.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmaxu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmaxu_vx_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB74_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll index ea75742ca6e43..bd49b9876575e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vmin-vp.ll @@ -270,12 +270,12 @@ define <256 x i8> @vmin_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zero ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: addi a4, a2, -128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) -; CHECK-NEXT: addi a1, a2, -128 -; CHECK-NEXT: sltu a4, a2, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 +; CHECK-NEXT: sltu a1, a2, a4 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a4 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vmin.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB22_2 @@ -1029,9 +1029,9 @@ declare <32 x i64> @llvm.vp.smin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vmin_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vmin_vx_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB74_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll index f4f54db64018d..f6e5fd42f07ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vminu-vp.ll @@ -269,12 +269,12 @@ define <256 x i8> @vminu_vx_v258i8(<256 x i8> %va, i8 %b, <256 x i1> %m, i32 zer ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a3, 128 +; CHECK-NEXT: addi a4, a2, -128 ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a1) -; CHECK-NEXT: addi a1, a2, -128 -; CHECK-NEXT: sltu a4, a2, a1 -; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: and a1, a4, a1 +; CHECK-NEXT: sltu a1, a2, a4 +; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a1, a1, a4 ; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma ; CHECK-NEXT: vminu.vx v16, v16, a0, v0.t ; CHECK-NEXT: bltu a2, a3, .LBB22_2 @@ -1028,9 +1028,9 @@ declare <32 x i64> @llvm.vp.umin.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i32) define <32 x i64> @vminu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vminu_vx_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB74_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll index 1f6513ae09d60..36cc8dd25bf94 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpgather.ll @@ -2052,13 +2052,13 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v24, (zero), v8, v0.t ; RV32-NEXT: addi a1, a0, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v8, 16 ; RV32-NEXT: sltu a0, a0, a1 ; RV32-NEXT: addi a0, a0, -1 ; RV32-NEXT: and a0, a0, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v8, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: vmv8r.v v8, v24 @@ -2077,9 +2077,9 @@ define <32 x double> @vpgather_v32f64(<32 x ptr> %ptrs, <32 x i1> %m, i32 zeroex ; RV64-NEXT: addi a1, a0, -16 ; RV64-NEXT: sltu a0, a0, a1 ; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a0, a0, a1 ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (zero), v16, v0.t ; RV64-NEXT: ret @@ -2093,8 +2093,8 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB95_2 ; RV32-NEXT: # %bb.1: @@ -2103,13 +2103,13 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2119,11 +2119,11 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v10, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsext.vf8 v16, v8 +; RV64-NEXT: vsext.vf8 v24, v10 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsext.vf8 v16, v10 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB95_2 ; RV64-NEXT: # %bb.1: @@ -2134,9 +2134,9 @@ define <32 x double> @vpgather_baseidx_v32i8_v32f64(ptr %base, <32 x i8> %idxs, ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2151,8 +2151,8 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: li a2, 32 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsext.vf4 v16, v8 -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsll.vi v16, v16, 3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB96_2 ; RV32-NEXT: # %bb.1: @@ -2161,13 +2161,13 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2175,14 +2175,14 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-LABEL: vpgather_baseidx_sext_v32i8_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsext.vf8 v16, v8 ; RV64-NEXT: vsetivli zero, 16, e8, m2, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsext.vf8 v24, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB96_2 ; RV64-NEXT: # %bb.1: @@ -2193,9 +2193,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2210,11 +2210,11 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 ; RV32-NEXT: li a3, 8 -; RV32-NEXT: li a4, 16 ; RV32-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV32-NEXT: vwmulu.vx v16, v8, a3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a4, .LBB97_2 +; RV32-NEXT: bltu a1, a3, .LBB97_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB97_2: @@ -2225,9 +2225,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2236,11 +2236,11 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64: # %bb.0: ; RV64-NEXT: li a2, 32 ; RV64-NEXT: li a3, 8 -; RV64-NEXT: li a4, 16 ; RV64-NEXT: vsetvli zero, a2, e8, m2, ta, ma ; RV64-NEXT: vwmulu.vx v16, v8, a3 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a2, a1 -; RV64-NEXT: bltu a1, a4, .LBB97_2 +; RV64-NEXT: bltu a1, a3, .LBB97_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB97_2: @@ -2251,9 +2251,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i8_v32f64(ptr %base, <32 x i8> %i ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei16.v v16, (a0), v24, v0.t ; RV64-NEXT: ret @@ -2268,11 +2268,11 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 ; RV32-NEXT: li a3, 8 -; RV32-NEXT: li a4, 16 ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; RV32-NEXT: vwmulsu.vx v16, v8, a3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a4, .LBB98_2 +; RV32-NEXT: bltu a1, a3, .LBB98_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB98_2: @@ -2283,9 +2283,9 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2295,11 +2295,11 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma ; RV64-NEXT: vslidedown.vi v12, v8, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsext.vf4 v16, v8 +; RV64-NEXT: vsext.vf4 v24, v12 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsll.vi v16, v24, 3 ; RV64-NEXT: li a3, 16 -; RV64-NEXT: vsext.vf4 v16, v12 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: vsll.vi v8, v24, 3 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB98_2 ; RV64-NEXT: # %bb.1: @@ -2310,9 +2310,9 @@ define <32 x double> @vpgather_baseidx_v32i16_v32f64(ptr %base, <32 x i16> %idxs ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2326,11 +2326,11 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 ; RV32-NEXT: li a3, 8 -; RV32-NEXT: li a4, 16 ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; RV32-NEXT: vwmulsu.vx v16, v8, a3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a4, .LBB99_2 +; RV32-NEXT: bltu a1, a3, .LBB99_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB99_2: @@ -2341,9 +2341,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2351,14 +2351,14 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-LABEL: vpgather_baseidx_sext_v32i16_v32f64: ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsext.vf4 v16, v8 ; RV64-NEXT: vsetivli zero, 16, e16, m4, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 16 -; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v16, v8 -; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: vsll.vi v8, v24, 3 +; RV64-NEXT: vsext.vf4 v24, v8 +; RV64-NEXT: vsll.vi v8, v16, 3 +; RV64-NEXT: vsll.vi v16, v24, 3 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB99_2 ; RV64-NEXT: # %bb.1: @@ -2369,9 +2369,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2386,11 +2386,11 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 ; RV32-NEXT: li a3, 8 -; RV32-NEXT: li a4, 16 ; RV32-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; RV32-NEXT: vwmulu.vx v16, v8, a3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 -; RV32-NEXT: bltu a1, a4, .LBB100_2 +; RV32-NEXT: bltu a1, a3, .LBB100_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a2, 16 ; RV32-NEXT: .LBB100_2: @@ -2401,9 +2401,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV32-NEXT: addi a2, a1, -16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV32-NEXT: vslidedown.vi v0, v0, 2 -; RV32-NEXT: and a1, a1, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2412,11 +2412,11 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64: # %bb.0: ; RV64-NEXT: li a2, 32 ; RV64-NEXT: li a3, 8 -; RV64-NEXT: li a4, 16 ; RV64-NEXT: vsetvli zero, a2, e16, m4, ta, ma ; RV64-NEXT: vwmulu.vx v16, v8, a3 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a2, a1 -; RV64-NEXT: bltu a1, a4, .LBB100_2 +; RV64-NEXT: bltu a1, a3, .LBB100_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a2, 16 ; RV64-NEXT: .LBB100_2: @@ -2427,9 +2427,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i16_v32f64(ptr %base, <32 x i16> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV64-NEXT: ret @@ -2443,9 +2443,9 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV32-LABEL: vpgather_baseidx_v32i32_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB101_2 ; RV32-NEXT: # %bb.1: @@ -2454,13 +2454,13 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2470,10 +2470,10 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v24, v8, 16 ; RV64-NEXT: li a2, 8 -; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwmulsu.vx v16, v24, a2 ; RV64-NEXT: vwmulsu.vx v24, v8, a2 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB101_2 ; RV64-NEXT: # %bb.1: @@ -2484,9 +2484,9 @@ define <32 x double> @vpgather_baseidx_v32i32_v32f64(ptr %base, <32 x i32> %idxs ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2499,9 +2499,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-LABEL: vpgather_baseidx_sext_v32i32_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB102_2 ; RV32-NEXT: # %bb.1: @@ -2510,13 +2510,13 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2526,10 +2526,10 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v24, v8, 16 ; RV64-NEXT: li a2, 8 -; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwmulsu.vx v16, v24, a2 ; RV64-NEXT: vwmulsu.vx v24, v8, a2 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB102_2 ; RV64-NEXT: # %bb.1: @@ -2540,9 +2540,9 @@ define <32 x double> @vpgather_baseidx_sext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2556,9 +2556,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-LABEL: vpgather_baseidx_zext_v32i32_v32f64: ; RV32: # %bb.0: ; RV32-NEXT: li a2, 32 -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vsll.vi v16, v8, 3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a2, a1 ; RV32-NEXT: bltu a1, a3, .LBB103_2 ; RV32-NEXT: # %bb.1: @@ -2567,13 +2567,13 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v8, (a0), v16, v0.t ; RV32-NEXT: addi a2, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v24, v16, 16 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a1, a1, a2 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v24, v16, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v24, v0.t ; RV32-NEXT: ret @@ -2583,10 +2583,10 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV64-NEXT: vslidedown.vi v24, v8, 16 ; RV64-NEXT: li a2, 8 -; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwmulu.vx v16, v24, a2 ; RV64-NEXT: vwmulu.vx v24, v8, a2 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB103_2 ; RV64-NEXT: # %bb.1: @@ -2597,9 +2597,9 @@ define <32 x double> @vpgather_baseidx_zext_v32i32_v32f64(ptr %base, <32 x i32> ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret @@ -2618,16 +2618,16 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV32-NEXT: vnsrl.wi v16, v8, 0 ; RV32-NEXT: li a2, 32 ; RV32-NEXT: addi a3, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; RV32-NEXT: vslideup.vi v16, v24, 16 ; RV32-NEXT: vsll.vi v24, v16, 3 ; RV32-NEXT: sltu a2, a1, a3 ; RV32-NEXT: addi a2, a2, -1 +; RV32-NEXT: and a2, a2, a3 ; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma ; RV32-NEXT: vslidedown.vi v8, v24, 16 -; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: li a2, 16 @@ -2644,8 +2644,8 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV64: # %bb.0: ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsll.vi v8, v8, 3 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a2, a1 ; RV64-NEXT: bltu a1, a3, .LBB104_2 ; RV64-NEXT: # %bb.1: @@ -2656,9 +2656,9 @@ define <32 x double> @vpgather_baseidx_v32f64(ptr %base, <32 x i64> %idxs, <32 x ; RV64-NEXT: addi a2, a1, -16 ; RV64-NEXT: sltu a1, a1, a2 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a1, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll index 6c9989775f790..4f3179823f5b0 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpload.ll @@ -374,12 +374,12 @@ define <32 x double> @vpload_v32f64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v8, (a0), v0.t ; CHECK-NEXT: addi a2, a1, -16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a0), v0.t ; CHECK-NEXT: ret @@ -403,12 +403,12 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: .LBB32_2: ; CHECK-NEXT: addi a4, a3, -16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v8, 2 ; CHECK-NEXT: sltu a3, a3, a4 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a3, a3, a4 ; CHECK-NEXT: addi a4, a1, 128 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v8, 2 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: addi a3, a2, -32 @@ -420,9 +420,9 @@ define <33 x double> @vpload_v33f64(ptr %ptr, <33 x i1> %m, i32 zeroext %evl) { ; CHECK-NEXT: # %bb.3: ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: .LBB32_4: +; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 4 -; CHECK-NEXT: addi a5, a1, 256 ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v24, (a5), v0.t ; CHECK-NEXT: bltu a2, a3, .LBB32_6 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll index 6394542479d1b..c6e64fe2bd32e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpmerge.ll @@ -1360,22 +1360,22 @@ define <32 x double> @vpmerge_vv_v32f64(<32 x double> %va, <32 x double> %vb, <3 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vmv8r.v v16, v8 ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: vle64.v v24, (a1) +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: vle64.v v8, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: bltu a2, a1, .LBB83_2 +; CHECK-NEXT: bltu a2, a3, .LBB83_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB83_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, tu, ma ; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma @@ -1406,9 +1406,9 @@ define <32 x double> @vpmerge_vf_v32f64(double %a, <32 x double> %vb, <32 x i1> ; CHECK-NEXT: addi a1, a0, -16 ; CHECK-NEXT: sltu a0, a0, a1 ; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: and a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, tu, ma ; CHECK-NEXT: vfmerge.vfm v16, v16, fa0, v0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll index f7e4716d2c847..cf5650c0ab4ed 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpscatter.ll @@ -1756,13 +1756,13 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (zero), v24, v0.t ; RV32-NEXT: addi a0, a1, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: sltu a1, a1, a0 ; RV32-NEXT: addi a1, a1, -1 ; RV32-NEXT: and a0, a1, a0 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: ret @@ -1778,23 +1778,23 @@ define void @vpscatter_v32f64(<32 x double> %val, <32 x ptr> %ptrs, <32 x i1> %m ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: addi a1, a0, 128 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-NEXT: vle64.v v16, (a1) +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: vle64.v v24, (a0) -; RV64-NEXT: li a1, 16 -; RV64-NEXT: mv a0, a2 -; RV64-NEXT: bltu a2, a1, .LBB83_2 +; RV64-NEXT: bltu a2, a3, .LBB83_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: li a0, 16 +; RV64-NEXT: li a1, 16 ; RV64-NEXT: .LBB83_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t ; RV64-NEXT: addi a0, a2, -16 ; RV64-NEXT: sltu a1, a2, a0 ; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a0, a1, a0 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -1816,8 +1816,8 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsll.vi v24, v24, 3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: bltu a2, a3, .LBB84_2 ; RV32-NEXT: # %bb.1: @@ -1826,13 +1826,13 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: addi a1, a2, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1854,14 +1854,14 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: vle32.v v16, (a1) ; RV64-NEXT: li a1, 8 -; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v24, 16 +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwmulsu.vx v8, v16, a1 ; RV64-NEXT: vwmulsu.vx v16, v24, a1 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB84_2 ; RV64-NEXT: # %bb.1: @@ -1870,20 +1870,20 @@ define void @vpscatter_baseidx_v32i32_v32f64(<32 x double> %val, ptr %base, <32 ; RV64-NEXT: addi a3, sp, 16 ; RV64-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v24, (a0), v16, v0.t +; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 @@ -1902,8 +1902,8 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsll.vi v24, v24, 3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: bltu a2, a3, .LBB85_2 ; RV32-NEXT: # %bb.1: @@ -1912,13 +1912,13 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: addi a1, a2, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -1940,14 +1940,14 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: vle32.v v16, (a1) ; RV64-NEXT: li a1, 8 -; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v24, 16 +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwmulsu.vx v8, v16, a1 ; RV64-NEXT: vwmulsu.vx v16, v24, a1 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB85_2 ; RV64-NEXT: # %bb.1: @@ -1956,20 +1956,20 @@ define void @vpscatter_baseidx_sext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: addi a3, sp, 16 ; RV64-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v24, (a0), v16, v0.t +; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 @@ -1989,8 +1989,8 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: li a3, 32 ; RV32-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; RV32-NEXT: vle32.v v24, (a1) -; RV32-NEXT: li a3, 16 ; RV32-NEXT: vsll.vi v24, v24, 3 +; RV32-NEXT: li a3, 16 ; RV32-NEXT: mv a1, a2 ; RV32-NEXT: bltu a2, a3, .LBB86_2 ; RV32-NEXT: # %bb.1: @@ -1999,13 +1999,13 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v8, (a0), v24, v0.t ; RV32-NEXT: addi a1, a2, -16 -; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vi v0, v0, 2 +; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma +; RV32-NEXT: vslidedown.vi v8, v24, 16 ; RV32-NEXT: sltu a2, a2, a1 ; RV32-NEXT: addi a2, a2, -1 ; RV32-NEXT: and a1, a2, a1 -; RV32-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV32-NEXT: vslidedown.vi v8, v24, 16 +; RV32-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vi v0, v0, 2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v8, v0.t ; RV32-NEXT: ret @@ -2027,14 +2027,14 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: vs8r.v v8, (a3) # Unknown-size Folded Spill ; RV64-NEXT: li a3, 32 ; RV64-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; RV64-NEXT: vle32.v v24, (a1) +; RV64-NEXT: vle32.v v16, (a1) ; RV64-NEXT: li a1, 8 -; RV64-NEXT: li a3, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m8, ta, ma -; RV64-NEXT: vslidedown.vi v16, v24, 16 +; RV64-NEXT: vslidedown.vi v24, v16, 16 ; RV64-NEXT: vsetivli zero, 16, e32, m4, ta, ma ; RV64-NEXT: vwmulu.vx v8, v16, a1 ; RV64-NEXT: vwmulu.vx v16, v24, a1 +; RV64-NEXT: li a3, 16 ; RV64-NEXT: mv a1, a2 ; RV64-NEXT: bltu a2, a3, .LBB86_2 ; RV64-NEXT: # %bb.1: @@ -2043,20 +2043,20 @@ define void @vpscatter_baseidx_zext_v32i32_v32f64(<32 x double> %val, ptr %base, ; RV64-NEXT: addi a3, sp, 16 ; RV64-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v24, (a0), v16, v0.t +; RV64-NEXT: vsoxei64.v v24, (a0), v8, v0.t ; RV64-NEXT: addi a1, a2, -16 ; RV64-NEXT: sltu a2, a2, a1 ; RV64-NEXT: addi a2, a2, -1 +; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vi v0, v0, 2 -; RV64-NEXT: and a1, a2, a1 ; RV64-NEXT: csrr a2, vlenb ; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, sp, a2 ; RV64-NEXT: addi a2, a2, 16 -; RV64-NEXT: vl8r.v v16, (a2) # Unknown-size Folded Reload +; RV64-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma -; RV64-NEXT: vsoxei64.v v16, (a0), v8, v0.t +; RV64-NEXT: vsoxei64.v v8, (a0), v16, v0.t ; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: slli a0, a0, 4 ; RV64-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll index d30e8b46e6df2..d3a8e8548f5b4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vpstore.ll @@ -304,12 +304,12 @@ define void @vpstore_v32f64(<32 x double> %val, ptr %ptr, <32 x i1> %m, i32 zero ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0), v0.t ; CHECK-NEXT: addi a2, a1, -16 -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v0, v0, 2 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: sltu a1, a1, a2 ; CHECK-NEXT: addi a1, a1, -1 ; CHECK-NEXT: and a1, a1, a2 -; CHECK-NEXT: addi a0, a0, 128 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v16, (a0), v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll index 7afd31fdd663c..8a15fa6929708 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsadd-vp.ll @@ -375,12 +375,12 @@ define <256 x i8> @vsadd_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: addi a3, a1, -128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: addi a0, a1, -128 -; CHECK-NEXT: sltu a3, a1, a0 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a0, a3, a0 +; CHECK-NEXT: sltu a0, a1, a3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a3 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsadd.vi v16, v16, -1, v0.t ; CHECK-NEXT: bltu a1, a2, .LBB32_2 @@ -1370,9 +1370,9 @@ declare <32 x i64> @llvm.vp.sadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vsadd_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsadd_vx_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB108_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll index f61b112fd8024..0f2ff55d767d4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vsaddu-vp.ll @@ -371,12 +371,12 @@ define <256 x i8> @vsaddu_vi_v258i8(<256 x i8> %va, <256 x i1> %m, i32 zeroext % ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma ; CHECK-NEXT: vmv1r.v v24, v0 ; CHECK-NEXT: li a2, 128 +; CHECK-NEXT: addi a3, a1, -128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma ; CHECK-NEXT: vlm.v v0, (a0) -; CHECK-NEXT: addi a0, a1, -128 -; CHECK-NEXT: sltu a3, a1, a0 -; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and a0, a3, a0 +; CHECK-NEXT: sltu a0, a1, a3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: and a0, a0, a3 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma ; CHECK-NEXT: vsaddu.vi v16, v16, -1, v0.t ; CHECK-NEXT: bltu a1, a2, .LBB32_2 @@ -1366,9 +1366,9 @@ declare <32 x i64> @llvm.vp.uadd.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vsaddu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vsaddu_vx_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB108_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll index dc83edba5ae8c..c5506e175ce00 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vscale-range.ll @@ -8,104 +8,48 @@ define <512 x i8> @vadd_v512i8_zvl128(<512 x i8> %a, <512 x i8> %b) #0 { ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a4, 48 -; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: slli a2, a2, 4 ; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x30, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 48 * vlenb -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a4, 40 -; CHECK-NEXT: mul a2, a2, a4 +; CHECK-NEXT: slli a2, a2, 3 ; CHECK-NEXT: add a2, sp, a2 ; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: li a2, 128 -; CHECK-NEXT: addi a4, a3, 128 -; CHECK-NEXT: addi a5, a3, 384 +; CHECK-NEXT: addi a4, a3, 256 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v8, (a5) -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a5, 24 -; CHECK-NEXT: mul a2, a2, a5 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: addi a2, a1, 128 -; CHECK-NEXT: vle8.v v8, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a3, 256 -; CHECK-NEXT: vle8.v v8, (a1) -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: vle8.v v8, (a2) -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vle8.v v24, (a4) +; CHECK-NEXT: addi a2, a3, 384 +; CHECK-NEXT: vle8.v v0, (a1) +; CHECK-NEXT: addi a1, a1, 128 +; CHECK-NEXT: vadd.vv v8, v0, v24 +; CHECK-NEXT: addi a4, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a4) # Unknown-size Folded Spill +; CHECK-NEXT: vle8.v v0, (a2) +; CHECK-NEXT: vle8.v v24, (a1) +; CHECK-NEXT: vadd.vv v24, v24, v0 +; CHECK-NEXT: addi a1, a3, 128 +; CHECK-NEXT: vle8.v v0, (a1) +; CHECK-NEXT: vadd.vv v16, v16, v0 ; CHECK-NEXT: vle8.v v0, (a3) ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v8, v8, v16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v16, v16, v8 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload -; CHECK-NEXT: vadd.vv v24, v8, v24 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 40 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 ; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vadd.vv v0, v8, v0 ; CHECK-NEXT: vse8.v v0, (a0) ; CHECK-NEXT: addi a1, a0, 384 -; CHECK-NEXT: vse8.v v16, (a1) +; CHECK-NEXT: vse8.v v24, (a1) ; CHECK-NEXT: addi a1, a0, 256 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: addi a2, sp, 16 ; CHECK-NEXT: vl8r.v v8, (a2) # Unknown-size Folded Reload ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: addi a0, a0, 128 -; CHECK-NEXT: vse8.v v24, (a0) +; CHECK-NEXT: vse8.v v16, (a0) ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 48 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -121,10 +65,10 @@ define <512 x i8> @vadd_v512i8_zvl256(<512 x i8> %a, <512 x i8> %b) #1 { ; CHECK-NEXT: addi a1, a0, 256 ; CHECK-NEXT: li a2, 256 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: vle8.v v0, (a1) -; CHECK-NEXT: vadd.vv v8, v8, v24 -; CHECK-NEXT: vadd.vv v16, v16, v0 +; CHECK-NEXT: vle8.v v24, (a1) +; CHECK-NEXT: vle8.v v0, (a0) +; CHECK-NEXT: vadd.vv v8, v8, v0 +; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: ret %c = add <512 x i8> %a, %b ret <512 x i8> %c diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll index 05254e60b65b7..81c98d6881e72 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect-vp.ll @@ -155,46 +155,30 @@ declare <256 x i8> @llvm.vp.select.v256i8(<256 x i1>, <256 x i8>, <256 x i8>, i3 define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 zeroext %evl) { ; CHECK-LABEL: select_v256i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma -; CHECK-NEXT: vmv1r.v v6, v8 ; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: addi a4, a1, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v24, (a0) +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: addi a0, a3, -128 -; CHECK-NEXT: vle8.v v8, (a4) +; CHECK-NEXT: vle8.v v24, (a4) ; CHECK-NEXT: sltu a4, a3, a0 -; CHECK-NEXT: vle8.v v16, (a1) ; CHECK-NEXT: addi a4, a4, -1 ; CHECK-NEXT: and a0, a4, a0 -; CHECK-NEXT: vmv1r.v v0, v6 ; CHECK-NEXT: vsetvli zero, a0, e8, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 +; CHECK-NEXT: vmerge.vvm v24, v24, v8, v0 +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: bltu a3, a2, .LBB11_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: li a3, 128 ; CHECK-NEXT: .LBB11_2: ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv8r.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.select.v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 %evl) ret <256 x i8> %v @@ -203,58 +187,21 @@ define <256 x i8> @select_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i3 define <256 x i8> @select_evl_v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c) { ; CHECK-LABEL: select_evl_v256i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a2, a2, a3 -; CHECK-NEXT: sub sp, sp, a2 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: slli a2, a2, 4 -; CHECK-NEXT: add a2, sp, a2 -; CHECK-NEXT: addi a2, a2, 16 -; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill +; CHECK-NEXT: vsetivli zero, 1, e8, m1, ta, ma +; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: li a2, 128 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vle8.v v16, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, a1, 128 ; CHECK-NEXT: vle8.v v24, (a0) -; CHECK-NEXT: vle8.v v16, (a1) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vmv1r.v v9, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, a1, 128 +; CHECK-NEXT: vle8.v v8, (a0) ; CHECK-NEXT: vsetivli zero, 1, e8, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v24, v24, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmerge.vvm v24, v8, v24, v0 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v16, v8, v0 +; CHECK-NEXT: vle8.v v8, (a1) +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 ; CHECK-NEXT: vmv8r.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call <256 x i8> @llvm.vp.select.v256i8(<256 x i1> %a, <256 x i8> %b, <256 x i8> %c, i32 129) ret <256 x i8> %v @@ -418,23 +365,23 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: addi a1, a0, 128 +; CHECK-NEXT: li a3, 16 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a1) +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: vle64.v v24, (a0) -; CHECK-NEXT: li a1, 16 -; CHECK-NEXT: mv a0, a2 -; CHECK-NEXT: bltu a2, a1, .LBB25_2 +; CHECK-NEXT: bltu a2, a3, .LBB25_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 16 +; CHECK-NEXT: li a1, 16 ; CHECK-NEXT: .LBB25_2: -; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: addi a0, a2, -16 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 2 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -453,56 +400,16 @@ define <32 x i64> @select_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 define <32 x i64> @select_evl_v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c) { ; CHECK-LABEL: select_evl_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a2, 24 -; CHECK-NEXT: mul a1, a1, a2 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: addi a1, a0, 128 ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v8, (a0) +; CHECK-NEXT: vle64.v v24, (a0) +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: addi a0, a0, 128 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vle64.v v16, (a1) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vle64.v v24, (a0) ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vi v24, v0, 2 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vslidedown.vi v0, v0, 2 ; CHECK-NEXT: vsetivli zero, 1, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v16, v24, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 ; CHECK-NEXT: ret %v = call <32 x i64> @llvm.vp.select.v32i64(<32 x i1> %a, <32 x i64> %b, <32 x i64> %c, i32 17) ret <32 x i64> %v @@ -621,20 +528,20 @@ define <64 x float> @select_v64f32(<64 x i1> %a, <64 x float> %b, <64 x float> % ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v16, (a1) +; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: vle32.v v24, (a0) -; CHECK-NEXT: mv a0, a2 ; CHECK-NEXT: bltu a2, a3, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a0, 32 +; CHECK-NEXT: li a1, 32 ; CHECK-NEXT: .LBB35_2: -; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 ; CHECK-NEXT: addi a0, a2, -32 ; CHECK-NEXT: sltu a1, a2, a0 ; CHECK-NEXT: addi a1, a1, -1 +; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v0, 4 -; CHECK-NEXT: and a0, a1, a0 ; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll index 557882ee31d4c..75f0119d14c2a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vselect.ll @@ -5,26 +5,26 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vv_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: lbu a2, 0(a2) ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: slli a1, a2, 30 -; RV32-NEXT: andi a4, a2, 1 +; RV32-NEXT: lbu a1, 0(a2) +; RV32-NEXT: slli a2, a1, 30 +; RV32-NEXT: andi a4, a1, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.x v10, a4 -; RV32-NEXT: slli a4, a2, 29 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 28 +; RV32-NEXT: slli a4, a1, 29 +; RV32-NEXT: srli a2, a2, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: slli a2, a1, 28 ; RV32-NEXT: srli a4, a4, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a4 -; RV32-NEXT: slli a4, a2, 27 -; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: slli a4, a1, 27 +; RV32-NEXT: srli a1, a1, 5 +; RV32-NEXT: srli a2, a2, 31 ; RV32-NEXT: srli a4, a4, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslide1down.vx v10, v10, a4 +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 @@ -35,26 +35,26 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vv_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: lbu a2, 0(a2) ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: slli a1, a2, 62 -; RV64-NEXT: andi a4, a2, 1 +; RV64-NEXT: lbu a1, 0(a2) +; RV64-NEXT: slli a2, a1, 62 +; RV64-NEXT: andi a4, a1, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.x v10, a4 -; RV64-NEXT: slli a4, a2, 61 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 60 +; RV64-NEXT: slli a4, a1, 61 +; RV64-NEXT: srli a2, a2, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: slli a2, a1, 60 ; RV64-NEXT: srli a4, a4, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a4 -; RV64-NEXT: slli a4, a2, 59 -; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: slli a4, a1, 59 +; RV64-NEXT: srli a1, a1, 5 +; RV64-NEXT: srli a2, a2, 63 ; RV64-NEXT: srli a4, a4, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: vslide1down.vx v10, v10, a4 +; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 @@ -73,26 +73,26 @@ define void @vselect_vv_v6i32(ptr %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vx_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: lbu a2, 0(a2) ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: slli a1, a2, 30 -; RV32-NEXT: andi a4, a2, 1 +; RV32-NEXT: lbu a1, 0(a2) +; RV32-NEXT: slli a2, a1, 30 +; RV32-NEXT: andi a4, a1, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.x v10, a4 -; RV32-NEXT: slli a4, a2, 29 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 28 +; RV32-NEXT: slli a4, a1, 29 +; RV32-NEXT: srli a2, a2, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: slli a2, a1, 28 ; RV32-NEXT: srli a4, a4, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a4 -; RV32-NEXT: slli a4, a2, 27 -; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: slli a4, a1, 27 +; RV32-NEXT: srli a1, a1, 5 +; RV32-NEXT: srli a2, a2, 31 ; RV32-NEXT: srli a4, a4, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslide1down.vx v10, v10, a4 +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 @@ -103,26 +103,26 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vx_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: lbu a2, 0(a2) ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: slli a1, a2, 62 -; RV64-NEXT: andi a4, a2, 1 +; RV64-NEXT: lbu a1, 0(a2) +; RV64-NEXT: slli a2, a1, 62 +; RV64-NEXT: andi a4, a1, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.x v10, a4 -; RV64-NEXT: slli a4, a2, 61 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 60 +; RV64-NEXT: slli a4, a1, 61 +; RV64-NEXT: srli a2, a2, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: slli a2, a1, 60 ; RV64-NEXT: srli a4, a4, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a4 -; RV64-NEXT: slli a4, a2, 59 -; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: slli a4, a1, 59 +; RV64-NEXT: srli a1, a1, 5 +; RV64-NEXT: srli a2, a2, 63 ; RV64-NEXT: srli a4, a4, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: vslide1down.vx v10, v10, a4 +; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 @@ -142,26 +142,26 @@ define void @vselect_vx_v6i32(i32 %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vi_v6i32: ; RV32: # %bb.0: -; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: slli a0, a1, 30 -; RV32-NEXT: andi a3, a1, 1 +; RV32-NEXT: lbu a0, 0(a1) +; RV32-NEXT: slli a1, a0, 30 +; RV32-NEXT: andi a3, a0, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.x v10, a3 -; RV32-NEXT: slli a3, a1, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 28 +; RV32-NEXT: slli a3, a0, 29 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a0, 28 ; RV32-NEXT: srli a3, a3, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a3 -; RV32-NEXT: slli a3, a1, 27 -; RV32-NEXT: srli a1, a1, 5 -; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: slli a3, a0, 27 +; RV32-NEXT: srli a0, a0, 5 +; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: srli a3, a3, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vslide1down.vx v10, v10, a3 +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 @@ -172,26 +172,26 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vi_v6i32: ; RV64: # %bb.0: -; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: slli a0, a1, 62 -; RV64-NEXT: andi a3, a1, 1 +; RV64-NEXT: lbu a0, 0(a1) +; RV64-NEXT: slli a1, a0, 62 +; RV64-NEXT: andi a3, a0, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.x v10, a3 -; RV64-NEXT: slli a3, a1, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 60 +; RV64-NEXT: slli a3, a0, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a0, 60 ; RV64-NEXT: srli a3, a3, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a3 -; RV64-NEXT: slli a3, a1, 59 -; RV64-NEXT: srli a1, a1, 5 -; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: slli a3, a0, 59 +; RV64-NEXT: srli a0, a0, 5 +; RV64-NEXT: srli a1, a1, 63 ; RV64-NEXT: srli a3, a3, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: vslide1down.vx v10, v10, a3 +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 @@ -210,26 +210,26 @@ define void @vselect_vi_v6i32(ptr %b, ptr %cc, ptr %z) { define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vv_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: lbu a2, 0(a2) ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a1) -; RV32-NEXT: slli a1, a2, 30 -; RV32-NEXT: andi a4, a2, 1 +; RV32-NEXT: lbu a1, 0(a2) +; RV32-NEXT: slli a2, a1, 30 +; RV32-NEXT: andi a4, a1, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.x v10, a4 -; RV32-NEXT: slli a4, a2, 29 -; RV32-NEXT: srli a1, a1, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: slli a1, a2, 28 +; RV32-NEXT: slli a4, a1, 29 +; RV32-NEXT: srli a2, a2, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: slli a2, a1, 28 ; RV32-NEXT: srli a4, a4, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a4 -; RV32-NEXT: slli a4, a2, 27 -; RV32-NEXT: srli a2, a2, 5 -; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: slli a4, a1, 27 +; RV32-NEXT: srli a1, a1, 5 +; RV32-NEXT: srli a2, a2, 31 ; RV32-NEXT: srli a4, a4, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a1 -; RV32-NEXT: vslide1down.vx v10, v10, a4 ; RV32-NEXT: vslide1down.vx v10, v10, a2 +; RV32-NEXT: vslide1down.vx v10, v10, a4 +; RV32-NEXT: vslide1down.vx v10, v10, a1 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 @@ -240,26 +240,26 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vv_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: lbu a2, 0(a2) ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a1) -; RV64-NEXT: slli a1, a2, 62 -; RV64-NEXT: andi a4, a2, 1 +; RV64-NEXT: lbu a1, 0(a2) +; RV64-NEXT: slli a2, a1, 62 +; RV64-NEXT: andi a4, a1, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.x v10, a4 -; RV64-NEXT: slli a4, a2, 61 -; RV64-NEXT: srli a1, a1, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: slli a1, a2, 60 +; RV64-NEXT: slli a4, a1, 61 +; RV64-NEXT: srli a2, a2, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: slli a2, a1, 60 ; RV64-NEXT: srli a4, a4, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a4 -; RV64-NEXT: slli a4, a2, 59 -; RV64-NEXT: srli a2, a2, 5 -; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: slli a4, a1, 59 +; RV64-NEXT: srli a1, a1, 5 +; RV64-NEXT: srli a2, a2, 63 ; RV64-NEXT: srli a4, a4, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a1 -; RV64-NEXT: vslide1down.vx v10, v10, a4 ; RV64-NEXT: vslide1down.vx v10, v10, a2 +; RV64-NEXT: vslide1down.vx v10, v10, a4 +; RV64-NEXT: vslide1down.vx v10, v10, a1 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 @@ -278,26 +278,26 @@ define void @vselect_vv_v6f32(ptr %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vx_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: slli a0, a1, 30 -; RV32-NEXT: andi a3, a1, 1 +; RV32-NEXT: lbu a0, 0(a1) +; RV32-NEXT: slli a1, a0, 30 +; RV32-NEXT: andi a3, a0, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.x v10, a3 -; RV32-NEXT: slli a3, a1, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 28 +; RV32-NEXT: slli a3, a0, 29 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a0, 28 ; RV32-NEXT: srli a3, a3, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a3 -; RV32-NEXT: slli a3, a1, 27 -; RV32-NEXT: srli a1, a1, 5 -; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: slli a3, a0, 27 +; RV32-NEXT: srli a0, a0, 5 +; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: srli a3, a3, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vslide1down.vx v10, v10, a3 +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 @@ -308,26 +308,26 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vx_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: slli a0, a1, 62 -; RV64-NEXT: andi a3, a1, 1 +; RV64-NEXT: lbu a0, 0(a1) +; RV64-NEXT: slli a1, a0, 62 +; RV64-NEXT: andi a3, a0, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.x v10, a3 -; RV64-NEXT: slli a3, a1, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 60 +; RV64-NEXT: slli a3, a0, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a0, 60 ; RV64-NEXT: srli a3, a3, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a3 -; RV64-NEXT: slli a3, a1, 59 -; RV64-NEXT: srli a1, a1, 5 -; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: slli a3, a0, 59 +; RV64-NEXT: srli a0, a0, 5 +; RV64-NEXT: srli a1, a1, 63 ; RV64-NEXT: srli a3, a3, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: vslide1down.vx v10, v10, a3 +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 @@ -347,26 +347,26 @@ define void @vselect_vx_v6f32(float %a, ptr %b, ptr %cc, ptr %z) { define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; RV32-LABEL: vselect_vfpzero_v6f32: ; RV32: # %bb.0: -; RV32-NEXT: lbu a1, 0(a1) ; RV32-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: slli a0, a1, 30 -; RV32-NEXT: andi a3, a1, 1 +; RV32-NEXT: lbu a0, 0(a1) +; RV32-NEXT: slli a1, a0, 30 +; RV32-NEXT: andi a3, a0, 1 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.x v10, a3 -; RV32-NEXT: slli a3, a1, 29 -; RV32-NEXT: srli a0, a0, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: slli a0, a1, 28 +; RV32-NEXT: slli a3, a0, 29 +; RV32-NEXT: srli a1, a1, 31 +; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: slli a1, a0, 28 ; RV32-NEXT: srli a3, a3, 31 ; RV32-NEXT: vslide1down.vx v10, v10, a3 -; RV32-NEXT: slli a3, a1, 27 -; RV32-NEXT: srli a1, a1, 5 -; RV32-NEXT: srli a0, a0, 31 +; RV32-NEXT: slli a3, a0, 27 +; RV32-NEXT: srli a0, a0, 5 +; RV32-NEXT: srli a1, a1, 31 ; RV32-NEXT: srli a3, a3, 31 -; RV32-NEXT: vslide1down.vx v10, v10, a0 -; RV32-NEXT: vslide1down.vx v10, v10, a3 ; RV32-NEXT: vslide1down.vx v10, v10, a1 +; RV32-NEXT: vslide1down.vx v10, v10, a3 +; RV32-NEXT: vslide1down.vx v10, v10, a0 ; RV32-NEXT: vslidedown.vi v10, v10, 2 ; RV32-NEXT: vand.vi v10, v10, 1 ; RV32-NEXT: vmsne.vi v0, v10, 0 @@ -377,26 +377,26 @@ define void @vselect_vfpzero_v6f32(ptr %b, ptr %cc, ptr %z) { ; ; RV64-LABEL: vselect_vfpzero_v6f32: ; RV64: # %bb.0: -; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vsetivli zero, 6, e32, m2, ta, ma ; RV64-NEXT: vle32.v v8, (a0) -; RV64-NEXT: slli a0, a1, 62 -; RV64-NEXT: andi a3, a1, 1 +; RV64-NEXT: lbu a0, 0(a1) +; RV64-NEXT: slli a1, a0, 62 +; RV64-NEXT: andi a3, a0, 1 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; RV64-NEXT: vmv.v.x v10, a3 -; RV64-NEXT: slli a3, a1, 61 -; RV64-NEXT: srli a0, a0, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: slli a0, a1, 60 +; RV64-NEXT: slli a3, a0, 61 +; RV64-NEXT: srli a1, a1, 63 +; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: slli a1, a0, 60 ; RV64-NEXT: srli a3, a3, 63 ; RV64-NEXT: vslide1down.vx v10, v10, a3 -; RV64-NEXT: slli a3, a1, 59 -; RV64-NEXT: srli a1, a1, 5 -; RV64-NEXT: srli a0, a0, 63 +; RV64-NEXT: slli a3, a0, 59 +; RV64-NEXT: srli a0, a0, 5 +; RV64-NEXT: srli a1, a1, 63 ; RV64-NEXT: srli a3, a3, 63 -; RV64-NEXT: vslide1down.vx v10, v10, a0 -; RV64-NEXT: vslide1down.vx v10, v10, a3 ; RV64-NEXT: vslide1down.vx v10, v10, a1 +; RV64-NEXT: vslide1down.vx v10, v10, a3 +; RV64-NEXT: vslide1down.vx v10, v10, a0 ; RV64-NEXT: vslidedown.vi v10, v10, 2 ; RV64-NEXT: vand.vi v10, v10, 1 ; RV64-NEXT: vmsne.vi v0, v10, 0 @@ -415,8 +415,8 @@ define void @vselect_vv_v8i32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vv_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret @@ -432,8 +432,8 @@ define void @vselect_vx_v8i32(i32 %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vx_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret @@ -450,8 +450,8 @@ define void @vselect_vi_v8i32(ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vi_v8i32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, -1, v0 ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret @@ -466,8 +466,8 @@ define void @vselect_vv_v8f32(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vv_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle32.v v8, (a0), v0.t ; CHECK-NEXT: vse32.v v8, (a3) ; CHECK-NEXT: ret @@ -483,8 +483,8 @@ define void @vselect_vx_v8f32(float %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vx_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret @@ -501,8 +501,8 @@ define void @vselect_vfpzero_v8f32(ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vfpzero_v8f32: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle32.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse32.v v8, (a2) ; CHECK-NEXT: ret @@ -517,8 +517,8 @@ define void @vselect_vv_v16i16(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vv_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret @@ -534,8 +534,8 @@ define void @vselect_vx_v16i16(i16 signext %a, ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vx_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vmerge.vxm v8, v8, a0, v0 ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret @@ -552,8 +552,8 @@ define void @vselect_vi_v16i16(ptr %b, ptr %cc, ptr %z) { ; CHECK-LABEL: vselect_vi_v16i16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, 4, v0 ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret @@ -569,8 +569,8 @@ define void @vselect_vv_v32f16(ptr %a, ptr %b, ptr %cc, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a4, 32 ; CHECK-NEXT: vsetvli zero, a4, e16, m4, ta, mu -; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a1) +; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: vle16.v v8, (a0), v0.t ; CHECK-NEXT: vse16.v v8, (a3) ; CHECK-NEXT: ret @@ -587,8 +587,8 @@ define void @vselect_vx_v32f16(half %a, ptr %b, ptr %cc, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vfmerge.vfm v8, v8, fa0, v0 ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret @@ -606,8 +606,8 @@ define void @vselect_vfpzero_v32f16(ptr %b, ptr %cc, ptr %z) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a3, 32 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma -; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vle16.v v8, (a0) +; CHECK-NEXT: vlm.v v0, (a1) ; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 ; CHECK-NEXT: vse16.v v8, (a2) ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll index 6ddf2e464750e..3e64b019643d1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssub-vp.ll @@ -1410,9 +1410,9 @@ declare <32 x i64> @llvm.vp.ssub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vssub_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vssub_vx_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB108_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll index c403593894794..8ad1fc384364b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vssubu-vp.ll @@ -1405,9 +1405,9 @@ declare <32 x i64> @llvm.vp.usub.sat.v32i64(<32 x i64>, <32 x i64>, <32 x i1>, i define <32 x i64> @vssubu_vx_v32i64(<32 x i64> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vssubu_vx_v32i64: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v24, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB108_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll index d241b78e41391..5a343b35e8fad 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd-mask.ll @@ -41,8 +41,8 @@ define <8 x i64> @vwaddu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 42 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vwaddu.vv v12, v8, v10 ; CHECK-NEXT: vmv4r.v v8, v12 @@ -77,8 +77,8 @@ define <8 x i64> @vwadd_wv_mask_v8i32_nonzero(<8 x i32> %x, <8 x i64> %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 42 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: vmerge.vvm v16, v10, v8, v0 ; CHECK-NEXT: vwadd.wv v8, v12, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll index 50184796b38f5..98188799fcca5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwadd.ll @@ -418,8 +418,8 @@ define <4 x i64> @vwadd_v4i64_v4i32_v4i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwadd_v4i64_v4i32_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vsext.vf4 v11, v8 ; CHECK-NEXT: vwadd.vv v8, v10, v11 ; CHECK-NEXT: ret @@ -695,10 +695,10 @@ define <8 x i16> @vwadd_vx_v8i16_i8(ptr %x, ptr %y) { define <8 x i16> @vwadd_vx_v8i16_i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwadd_vx_v8i16_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lh a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lh a0, 0(a1) +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret @@ -750,10 +750,10 @@ define <4 x i32> @vwadd_vx_v4i32_i16(ptr %x, ptr %y) { define <4 x i32> @vwadd_vx_v4i32_i32(ptr %x, ptr %y) { ; CHECK-LABEL: vwadd_vx_v4i32_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lw a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lw a0, 0(a1) +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwadd.wv v8, v8, v9 ; CHECK-NEXT: ret @@ -824,11 +824,11 @@ define <2 x i64> @vwadd_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwadd_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: lw a2, 0(a1) -; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lw a0, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -838,10 +838,10 @@ define <2 x i64> @vwadd_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwadd_vx_v2i64_i64: ; RV64: # %bb.0: -; RV64-NEXT: ld a1, 0(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: ld a0, 0(a1) +; RV64-NEXT: vmv.v.x v8, a0 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwadd.wv v8, v8, v9 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll index 98f246b8741dc..b553019568b4f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwaddu.ll @@ -418,8 +418,8 @@ define <4 x i64> @vwaddu_v4i64_v4i32_v4i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwaddu_v4i64_v4i32_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vzext.vf4 v11, v8 ; CHECK-NEXT: vwaddu.vv v8, v10, v11 ; CHECK-NEXT: ret @@ -695,10 +695,10 @@ define <8 x i16> @vwaddu_vx_v8i16_i8(ptr %x, ptr %y) { define <8 x i16> @vwaddu_vx_v8i16_i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwaddu_vx_v8i16_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lh a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lh a0, 0(a1) +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret @@ -750,10 +750,10 @@ define <4 x i32> @vwaddu_vx_v4i32_i16(ptr %x, ptr %y) { define <4 x i32> @vwaddu_vx_v4i32_i32(ptr %x, ptr %y) { ; CHECK-LABEL: vwaddu_vx_v4i32_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lw a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lw a0, 0(a1) +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwaddu.wv v8, v8, v9 ; CHECK-NEXT: ret @@ -866,11 +866,11 @@ define <2 x i64> @vwaddu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwaddu_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: lw a2, 0(a1) -; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lw a0, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -880,10 +880,10 @@ define <2 x i64> @vwaddu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwaddu_vx_v2i64_i64: ; RV64: # %bb.0: -; RV64-NEXT: ld a1, 0(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: ld a0, 0(a1) +; RV64-NEXT: vmv.v.x v8, a0 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwaddu.wv v8, v8, v9 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll index eb7be14abe431..115113045548b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmul.ll @@ -454,8 +454,8 @@ define <4 x i64> @vwmul_v4i64_v4i32_v4i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwmul_v4i64_v4i32_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vsext.vf4 v11, v8 ; CHECK-NEXT: vwmul.vv v8, v10, v11 ; CHECK-NEXT: ret @@ -859,11 +859,11 @@ define <2 x i64> @vwmul_vx_v2i64_i64(ptr %x, ptr %y) { ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 -; RV32-NEXT: lw a2, 0(a1) -; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle32.v v8, (a0) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lw a0, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v9, (a0), zero diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll index 8626b25a9d323..ce84e9fa0cbfd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulsu.ll @@ -446,8 +446,8 @@ define <4 x i64> @vwmulsu_v4i64_v4i32_v4i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwmulsu_v4i64_v4i32_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vsext.vf4 v11, v8 ; CHECK-NEXT: vwmulsu.vv v8, v11, v10 ; CHECK-NEXT: ret @@ -740,10 +740,10 @@ define <8 x i16> @vwmulsu_vx_v8i16_i8(ptr %x, ptr %y) { define <8 x i16> @vwmulsu_vx_v8i16_i8_swap(ptr %x, ptr %y) { ; CHECK-LABEL: vwmulsu_vx_v8i16_i8_swap: ; CHECK: # %bb.0: -; CHECK-NEXT: lb a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lb a0, 0(a1) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vwmulsu.vv v8, v10, v9 ; CHECK-NEXT: ret %a = load <8 x i8>, ptr %x diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll index 007b561a2247a..9adaefd37abab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwmulu.ll @@ -430,8 +430,8 @@ define <4 x i64> @vwmulu_v4i64_v4i32_v4i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwmulu_v4i64_v4i32_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vzext.vf4 v11, v8 ; CHECK-NEXT: vwmulu.vv v8, v10, v11 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll index 382f00913cb41..36af235446425 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub-mask.ll @@ -41,8 +41,8 @@ define <8 x i64> @vwsubu_vv_mask_v8i32(<8 x i32> %x, <8 x i32> %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 42 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: vmv.v.i v12, 0 +; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: vmerge.vvm v8, v12, v8, v0 ; CHECK-NEXT: vwsubu.vv v12, v10, v8 ; CHECK-NEXT: vmv4r.v v8, v12 @@ -60,8 +60,8 @@ define <8 x i64> @vwsub_wv_mask_v8i32_nonzero(<8 x i32> %x, <8 x i64> %y) { ; CHECK: # %bb.0: ; CHECK-NEXT: li a0, 42 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: vmv.v.i v10, 1 +; CHECK-NEXT: vmslt.vx v0, v8, a0 ; CHECK-NEXT: vmerge.vvm v16, v10, v8, v0 ; CHECK-NEXT: vwsub.wv v8, v12, v16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll index 7a925165d9816..5d3e39f96d567 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsub.ll @@ -418,8 +418,8 @@ define <4 x i64> @vwsub_v4i64_v4i32_v4i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsub_v4i64_v4i32_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vsext.vf4 v11, v8 ; CHECK-NEXT: vwsub.vv v8, v10, v11 ; CHECK-NEXT: ret @@ -677,10 +677,10 @@ define <16 x i64> @vwsub_vx_v16i64(ptr %x, i32 %y) { define <8 x i16> @vwsub_vx_v8i16_i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsub_vx_v8i16_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lb a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lb a0, 0(a1) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vwsub.vv v8, v10, v9 ; CHECK-NEXT: ret %a = load <8 x i8>, ptr %x @@ -696,10 +696,10 @@ define <8 x i16> @vwsub_vx_v8i16_i8(ptr %x, ptr %y) { define <8 x i16> @vwsub_vx_v8i16_i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwsub_vx_v8i16_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lh a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lh a0, 0(a1) +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret @@ -715,10 +715,10 @@ define <8 x i16> @vwsub_vx_v8i16_i16(ptr %x, ptr %y) { define <4 x i32> @vwsub_vx_v4i32_i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsub_vx_v4i32_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lb a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lb a0, 0(a1) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vwsub.vv v8, v10, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x @@ -734,10 +734,10 @@ define <4 x i32> @vwsub_vx_v4i32_i8(ptr %x, ptr %y) { define <4 x i32> @vwsub_vx_v4i32_i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwsub_vx_v4i32_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lh a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lh a0, 0(a1) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vwsub.vv v8, v10, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x @@ -753,10 +753,10 @@ define <4 x i32> @vwsub_vx_v4i32_i16(ptr %x, ptr %y) { define <4 x i32> @vwsub_vx_v4i32_i32(ptr %x, ptr %y) { ; CHECK-LABEL: vwsub_vx_v4i32_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lw a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lw a0, 0(a1) +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwsub.wv v8, v8, v9 ; CHECK-NEXT: ret @@ -772,10 +772,10 @@ define <4 x i32> @vwsub_vx_v4i32_i32(ptr %x, ptr %y) { define <2 x i64> @vwsub_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; CHECK-LABEL: vwsub_vx_v2i64_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lb a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lb a0, 0(a1) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vwsub.vv v8, v10, v9 ; CHECK-NEXT: ret %a = load <2 x i32>, ptr %x @@ -791,10 +791,10 @@ define <2 x i64> @vwsub_vx_v2i64_i8(ptr %x, ptr %y) nounwind { define <2 x i64> @vwsub_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; CHECK-LABEL: vwsub_vx_v2i64_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lh a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lh a0, 0(a1) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vwsub.vv v8, v10, v9 ; CHECK-NEXT: ret %a = load <2 x i32>, ptr %x @@ -810,10 +810,10 @@ define <2 x i64> @vwsub_vx_v2i64_i16(ptr %x, ptr %y) nounwind { define <2 x i64> @vwsub_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; CHECK-LABEL: vwsub_vx_v2i64_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lw a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lw a0, 0(a1) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vwsub.vv v8, v10, v9 ; CHECK-NEXT: ret %a = load <2 x i32>, ptr %x @@ -830,11 +830,11 @@ define <2 x i64> @vwsub_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsub_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: lw a2, 0(a1) -; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lw a0, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -844,10 +844,10 @@ define <2 x i64> @vwsub_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsub_vx_v2i64_i64: ; RV64: # %bb.0: -; RV64-NEXT: ld a1, 0(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: ld a0, 0(a1) +; RV64-NEXT: vmv.v.x v8, a0 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwsub.wv v8, v8, v9 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll index 4c08a8c15a388..bbe1ba03bdb6d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsubu.ll @@ -418,8 +418,8 @@ define <4 x i64> @vwsubu_v4i64_v4i32_v4i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_v4i64_v4i32_v4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle32.v v10, (a0) +; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vzext.vf4 v11, v8 ; CHECK-NEXT: vwsubu.vv v8, v10, v11 ; CHECK-NEXT: ret @@ -677,10 +677,10 @@ define <16 x i64> @vwsubu_vx_v16i64(ptr %x, i32 %y) { define <8 x i16> @vwsubu_vx_v8i16_i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_vx_v8i16_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lbu a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lbu a0, 0(a1) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vwsubu.vv v8, v10, v9 ; CHECK-NEXT: ret %a = load <8 x i8>, ptr %x @@ -696,10 +696,10 @@ define <8 x i16> @vwsubu_vx_v8i16_i8(ptr %x, ptr %y) { define <8 x i16> @vwsubu_vx_v8i16_i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_vx_v8i16_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lh a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-NEXT: vle8.v v9, (a0) -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lh a0, 0(a1) +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma ; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret @@ -715,10 +715,10 @@ define <8 x i16> @vwsubu_vx_v8i16_i16(ptr %x, ptr %y) { define <4 x i32> @vwsubu_vx_v4i32_i8(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_vx_v4i32_i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lbu a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lbu a0, 0(a1) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vwsubu.vv v8, v10, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x @@ -734,10 +734,10 @@ define <4 x i32> @vwsubu_vx_v4i32_i8(ptr %x, ptr %y) { define <4 x i32> @vwsubu_vx_v4i32_i16(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_vx_v4i32_i16: ; CHECK: # %bb.0: -; CHECK-NEXT: lhu a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v10, a1 +; CHECK-NEXT: lhu a0, 0(a1) +; CHECK-NEXT: vmv.v.x v10, a0 ; CHECK-NEXT: vwsubu.vv v8, v10, v9 ; CHECK-NEXT: ret %a = load <4 x i16>, ptr %x @@ -753,10 +753,10 @@ define <4 x i32> @vwsubu_vx_v4i32_i16(ptr %x, ptr %y) { define <4 x i32> @vwsubu_vx_v4i32_i32(ptr %x, ptr %y) { ; CHECK-LABEL: vwsubu_vx_v4i32_i32: ; CHECK: # %bb.0: -; CHECK-NEXT: lw a1, 0(a1) ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vle16.v v9, (a0) -; CHECK-NEXT: vmv.v.x v8, a1 +; CHECK-NEXT: lw a0, 0(a1) +; CHECK-NEXT: vmv.v.x v8, a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vwsubu.wv v8, v8, v9 ; CHECK-NEXT: ret @@ -786,10 +786,10 @@ define <2 x i64> @vwsubu_vx_v2i64_i8(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsubu_vx_v2i64_i8: ; RV64: # %bb.0: -; RV64-NEXT: lbu a1, 0(a1) ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: lbu a0, 0(a1) +; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vwsubu.vv v8, v10, v9 ; RV64-NEXT: ret %a = load <2 x i32>, ptr %x @@ -819,10 +819,10 @@ define <2 x i64> @vwsubu_vx_v2i64_i16(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsubu_vx_v2i64_i16: ; RV64: # %bb.0: -; RV64-NEXT: lhu a1, 0(a1) ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: lhu a0, 0(a1) +; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vwsubu.vv v8, v10, v9 ; RV64-NEXT: ret %a = load <2 x i32>, ptr %x @@ -852,10 +852,10 @@ define <2 x i64> @vwsubu_vx_v2i64_i32(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsubu_vx_v2i64_i32: ; RV64: # %bb.0: -; RV64-NEXT: lwu a1, 0(a1) ; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v10, a1 +; RV64-NEXT: lwu a0, 0(a1) +; RV64-NEXT: vmv.v.x v10, a0 ; RV64-NEXT: vwsubu.vv v8, v10, v9 ; RV64-NEXT: ret %a = load <2 x i32>, ptr %x @@ -872,11 +872,11 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; RV32-LABEL: vwsubu_vx_v2i64_i64: ; RV32: # %bb.0: ; RV32-NEXT: addi sp, sp, -16 -; RV32-NEXT: lw a2, 0(a1) -; RV32-NEXT: lw a1, 4(a1) ; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vle32.v v9, (a0) -; RV32-NEXT: sw a2, 8(sp) +; RV32-NEXT: lw a0, 0(a1) +; RV32-NEXT: lw a1, 4(a1) +; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 ; RV32-NEXT: vlse64.v v8, (a0), zero @@ -886,10 +886,10 @@ define <2 x i64> @vwsubu_vx_v2i64_i64(ptr %x, ptr %y) nounwind { ; ; RV64-LABEL: vwsubu_vx_v2i64_i64: ; RV64: # %bb.0: -; RV64-NEXT: ld a1, 0(a1) ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle32.v v9, (a0) -; RV64-NEXT: vmv.v.x v8, a1 +; RV64-NEXT: ld a0, 0(a1) +; RV64-NEXT: vmv.v.x v8, a0 ; RV64-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; RV64-NEXT: vwsubu.wv v8, v8, v9 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll index df90dae379c06..b38701ebd3448 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-zext-vp.ll @@ -151,9 +151,9 @@ declare <32 x i64> @llvm.vp.zext.v32i64.v32i32(<32 x i32>, <32 x i1>, i32) define <32 x i64> @vzext_v32i64_v32i32(<32 x i32> %va, <32 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vzext_v32i64_v32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v16, v0, 2 +; CHECK-NEXT: li a2, 16 ; CHECK-NEXT: mv a1, a0 ; CHECK-NEXT: bltu a0, a2, .LBB12_2 ; CHECK-NEXT: # %bb.1: diff --git a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll index b7661bd826fed..ad973b72b271f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll +++ b/llvm/test/CodeGen/RISCV/rvv/float-round-conv.ll @@ -405,8 +405,8 @@ define @ceil_nxv1f32_to_si8( %x) { ; RV32-NEXT: vfabs.v v9, v8 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -424,8 +424,8 @@ define @ceil_nxv1f32_to_si8( %x) { ; RV64-NEXT: vfabs.v v9, v8 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -448,8 +448,8 @@ define @ceil_nxv1f32_to_ui8( %x) { ; RV32-NEXT: vfabs.v v9, v8 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v9, fa5 ; RV32-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -467,8 +467,8 @@ define @ceil_nxv1f32_to_ui8( %x) { ; RV64-NEXT: vfabs.v v9, v8 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v9, fa5 ; RV64-NEXT: vfcvt.x.f.v v9, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -631,8 +631,8 @@ define @ceil_nxv4f32_to_si8( %x) { ; RV32-NEXT: vfabs.v v10, v8 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v10, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v10, fa5 ; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -650,8 +650,8 @@ define @ceil_nxv4f32_to_si8( %x) { ; RV64-NEXT: vfabs.v v10, v8 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v10, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v10, fa5 ; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -674,8 +674,8 @@ define @ceil_nxv4f32_to_ui8( %x) { ; RV32-NEXT: vfabs.v v10, v8 ; RV32-NEXT: lui a0, 307200 ; RV32-NEXT: fmv.w.x fa5, a0 -; RV32-NEXT: vmflt.vf v0, v10, fa5 ; RV32-NEXT: fsrmi a0, 3 +; RV32-NEXT: vmflt.vf v0, v10, fa5 ; RV32-NEXT: vfcvt.x.f.v v10, v8, v0.t ; RV32-NEXT: fsrm a0 ; RV32-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -693,8 +693,8 @@ define @ceil_nxv4f32_to_ui8( %x) { ; RV64-NEXT: vfabs.v v10, v8 ; RV64-NEXT: lui a0, 307200 ; RV64-NEXT: fmv.w.x fa5, a0 -; RV64-NEXT: vmflt.vf v0, v10, fa5 ; RV64-NEXT: fsrmi a0, 3 +; RV64-NEXT: vmflt.vf v0, v10, fa5 ; RV64-NEXT: vfcvt.x.f.v v10, v8, v0.t ; RV64-NEXT: fsrm a0 ; RV64-NEXT: vfcvt.f.x.v v10, v10, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll index f9b5095c9af1d..c8b5487b3aee6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/floor-vp.ll @@ -22,12 +22,12 @@ define @vp_floor_nxv1bf16( %va, @vp_floor_nxv1bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -76,12 +76,12 @@ define @vp_floor_nxv2bf16( %va, @vp_floor_nxv2bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -130,12 +130,12 @@ define @vp_floor_nxv4bf16( %va, @vp_floor_nxv4bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -184,12 +184,12 @@ define @vp_floor_nxv8bf16( %va, @vp_floor_nxv8bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -238,12 +238,12 @@ define @vp_floor_nxv16bf16( %va, @vp_floor_nxv16bf16_unmasked( @vp_floor_nxv32bf16( %va, @vp_floor_nxv32bf16( %va, @vp_floor_nxv32bf16( %va, @vp_floor_nxv32bf16_unmasked( @vp_floor_nxv32bf16_unmasked( @vp_floor_nxv32bf16_unmasked( @llvm.vp.floor.nxv1f16(, @vp_floor_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a0) +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -461,12 +461,12 @@ define @vp_floor_nxv1f16( %va, @vp_floor_nxv1f16( %va, @vp_floor_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -503,11 +503,11 @@ define @vp_floor_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -525,13 +525,13 @@ declare @llvm.vp.floor.nxv2f16(, @vp_floor_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a0) +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -547,12 +547,12 @@ define @vp_floor_nxv2f16( %va, @vp_floor_nxv2f16( %va, @vp_floor_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -589,11 +589,11 @@ define @vp_floor_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -611,13 +611,13 @@ declare @llvm.vp.floor.nxv4f16(, @vp_floor_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a0) +; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 2 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -633,12 +633,12 @@ define @vp_floor_nxv4f16( %va, @vp_floor_nxv4f16( %va, @vp_floor_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -675,11 +675,11 @@ define @vp_floor_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -699,12 +699,12 @@ define @vp_floor_nxv8f16( %va, @vp_floor_nxv8f16( %va, @vp_floor_nxv8f16( %va, @vp_floor_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -763,11 +763,11 @@ define @vp_floor_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -787,12 +787,12 @@ define @vp_floor_nxv16f16( %va, @vp_floor_nxv16f16( %va, @vp_floor_nxv16f16( %va, @vp_floor_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -851,11 +851,11 @@ define @vp_floor_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -875,12 +875,12 @@ define @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16( %va, @vp_floor_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_floor_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; ZVFH-NEXT: fsrmi a0, 2 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -995,11 +995,12 @@ define @vp_floor_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: fsrmi a4, 2 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: vmv1r.v v17, v16 @@ -1014,11 +1015,10 @@ define @vp_floor_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a2, 2 ; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: fsrm a4 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t @@ -1033,10 +1033,10 @@ define @vp_floor_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 2 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1064,9 +1064,9 @@ define @vp_floor_nxv1f32( %va, @vp_floor_nxv1f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1106,9 +1106,9 @@ define @vp_floor_nxv2f32( %va, @vp_floor_nxv2f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1149,9 +1149,9 @@ define @vp_floor_nxv4f32( %va, @vp_floor_nxv4f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1193,9 +1193,9 @@ define @vp_floor_nxv8f32( %va, @vp_floor_nxv8f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1237,9 +1237,9 @@ define @vp_floor_nxv16f32( %va, @vp_floor_nxv16f32_unmasked( % ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1276,13 +1276,13 @@ declare @llvm.vp.floor.nxv1f64(, @vp_floor_nxv1f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a0) +; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 2 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -1297,12 +1297,12 @@ define @vp_floor_nxv1f64( %va, @vp_floor_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1320,12 +1320,12 @@ define @vp_floor_nxv2f64( %va, @vp_floor_nxv2f64( %va, @vp_floor_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1364,12 +1364,12 @@ define @vp_floor_nxv4f64( %va, @vp_floor_nxv4f64( %va, @vp_floor_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1408,12 +1408,12 @@ define @vp_floor_nxv7f64( %va, @vp_floor_nxv7f64( %va, @vp_floor_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1452,12 +1452,12 @@ define @vp_floor_nxv8f64( %va, @vp_floor_nxv8f64( %va, @vp_floor_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_floor_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a0) ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1498,59 +1498,66 @@ define @vp_floor_nxv16f64( %va, @vp_floor_nxv16f64_unmasked( ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: fsrmi a3, 2 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a2, 2 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a3 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -1585,8 +1592,8 @@ define @vp_floor_nxv16f64_unmasked( ; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 2 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll index 7fad68dbfbbda..42903f0d85e32 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fmaximum-sdnode.ll @@ -22,16 +22,14 @@ define @vfmax_nxv1bf16_vv( %a, @vfmax_nxv2bf16_vv( %a, @vfmax_nxv4bf16_vv( %a, @vfmax_nxv8bf16_vv( %a, @vfmax_nxv1f16_vv( %a, @vfmax_nxv2f16_vv( %a, @vfmax_nxv4f16_vv( %a, @vfmax_nxv8f16_vv( %a, @vfmax_nxv1f16_vv_nnana( %a, @vfmax_nxv1f16_vv_nnanb( %a, @vfmax_vv_nxv1bf16_unmasked( % ; CHECK-LABEL: vfmax_vv_nxv1bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmax.vv v9, v8, v9 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 +; CHECK-NEXT: vfmax.vv v9, v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret @@ -87,16 +85,14 @@ define @vfmax_vv_nxv2bf16_unmasked( % ; CHECK-LABEL: vfmax_vv_nxv2bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmax.vv v9, v8, v9 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 +; CHECK-NEXT: vfmax.vv v9, v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret @@ -134,15 +130,13 @@ define @vfmax_vv_nxv4bf16_unmasked( % ; CHECK-LABEL: vfmax_vv_nxv4bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 ; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 +; CHECK-NEXT: vmerge.vvm v8, v12, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 ; CHECK-NEXT: vfmax.vv v10, v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 @@ -181,15 +175,13 @@ define @vfmax_vv_nxv8bf16_unmasked( % ; CHECK-LABEL: vfmax_vv_nxv8bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 +; CHECK-NEXT: vmerge.vvm v8, v16, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 ; CHECK-NEXT: vfmax.vv v12, v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 @@ -633,16 +625,14 @@ define @vfmax_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmax_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -701,16 +691,14 @@ define @vfmax_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmax_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmax.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmax.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -771,15 +759,13 @@ define @vfmax_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmax_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmax.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -843,15 +829,13 @@ define @vfmax_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmax_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmax.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -1615,8 +1599,6 @@ define @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64( %va, @vfmax_vv_nxv16f64_unmasked( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 @@ -1726,45 +1702,36 @@ define @vfmax_vv_nxv16f64_unmasked( ; CHECK-NEXT: and a3, a3, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmax.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vfmax.vv v16, v16, v8 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: bltu a2, a1, .LBB41_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB41_2: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v7, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vfmax.vv v8, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: vfmax.vv v8, v8, v24 -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll index 8cae0bbc03c8e..3dc02bb4a5a11 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fminimum-sdnode.ll @@ -22,16 +22,14 @@ define @vfmin_nxv1bf16_vv( %a, @vfmin_nxv2bf16_vv( %a, @vfmin_nxv4bf16_vv( %a, @vfmin_nxv8bf16_vv( %a, @vfmin_nxv1f16_vv( %a, @vfmin_nxv2f16_vv( %a, @vfmin_nxv4f16_vv( %a, @vfmin_nxv8f16_vv( %a, @vfmin_nxv1f16_vv_nnana( %a, @vfmin_nxv1f16_vv_nnanb( %a, @vfmin_vv_nxv1bf16_unmasked( % ; CHECK-LABEL: vfmin_vv_nxv1bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmin.vv v9, v8, v9 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 +; CHECK-NEXT: vfmin.vv v9, v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret @@ -87,16 +85,14 @@ define @vfmin_vv_nxv2bf16_unmasked( % ; CHECK-LABEL: vfmin_vv_nxv2bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; CHECK-NEXT: vmfeq.vv v0, v9, v9 +; CHECK-NEXT: vmerge.vvm v8, v9, v10, v0 ; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9 -; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0 -; CHECK-NEXT: vmfeq.vv v0, v8, v8 -; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0 -; CHECK-NEXT: vfmin.vv v9, v8, v9 +; CHECK-NEXT: vmerge.vvm v9, v10, v9, v0 +; CHECK-NEXT: vfmin.vv v9, v9, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 ; CHECK-NEXT: ret @@ -134,15 +130,13 @@ define @vfmin_vv_nxv4bf16_unmasked( % ; CHECK-LABEL: vfmin_vv_nxv4bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v10, v10 -; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v9 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0 ; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0 +; CHECK-NEXT: vmerge.vvm v8, v12, v10, v0 +; CHECK-NEXT: vmfeq.vv v0, v10, v10 +; CHECK-NEXT: vmerge.vvm v10, v10, v12, v0 ; CHECK-NEXT: vfmin.vv v10, v10, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 @@ -181,15 +175,13 @@ define @vfmin_vv_nxv8bf16_unmasked( % ; CHECK-LABEL: vfmin_vv_nxv8bf16_unmasked: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 +; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v10 +; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v12, v12 -; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10 -; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0 ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0 +; CHECK-NEXT: vmerge.vvm v8, v16, v12, v0 +; CHECK-NEXT: vmfeq.vv v0, v12, v12 +; CHECK-NEXT: vmerge.vvm v12, v12, v16, v0 ; CHECK-NEXT: vfmin.vv v12, v12, v8 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 @@ -633,16 +625,14 @@ define @vfmin_vv_nxv1f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmin_vv_nxv1f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -701,16 +691,14 @@ define @vfmin_vv_nxv2f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmin_vv_nxv2f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; ZVFHMIN-NEXT: vmfeq.vv v0, v9, v9 +; ZVFHMIN-NEXT: vmerge.vvm v8, v9, v10, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v8, v9 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v8, v0 -; ZVFHMIN-NEXT: vmfeq.vv v0, v8, v8 -; ZVFHMIN-NEXT: vmerge.vvm v8, v8, v10, v0 -; ZVFHMIN-NEXT: vfmin.vv v9, v8, v9 +; ZVFHMIN-NEXT: vmerge.vvm v9, v10, v9, v0 +; ZVFHMIN-NEXT: vfmin.vv v9, v9, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 ; ZVFHMIN-NEXT: ret @@ -771,15 +759,13 @@ define @vfmin_vv_nxv4f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmin_vv_nxv4f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v9 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v10, v12, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vmerge.vvm v10, v12, v10, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v10, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v10, v10 +; ZVFHMIN-NEXT: vmerge.vvm v10, v10, v12, v0 ; ZVFHMIN-NEXT: vfmin.vv v10, v10, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 @@ -843,15 +829,13 @@ define @vfmin_vv_nxv8f16_unmasked( %va, < ; ZVFHMIN-LABEL: vfmin_vv_nxv8f16_unmasked: ; ZVFHMIN: # %bb.0: ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v10 +; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 -; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v10 -; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma -; ZVFHMIN-NEXT: vmerge.vvm v8, v12, v16, v0 ; ZVFHMIN-NEXT: vmfeq.vv v0, v16, v16 -; ZVFHMIN-NEXT: vmerge.vvm v12, v16, v12, v0 +; ZVFHMIN-NEXT: vmerge.vvm v8, v16, v12, v0 +; ZVFHMIN-NEXT: vmfeq.vv v0, v12, v12 +; ZVFHMIN-NEXT: vmerge.vvm v12, v12, v16, v0 ; ZVFHMIN-NEXT: vfmin.vv v12, v12, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 @@ -1615,8 +1599,6 @@ define @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64( %va, @vfmin_vv_nxv16f64_unmasked( ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: sub sp, sp, a1 +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 @@ -1726,45 +1702,36 @@ define @vfmin_vv_nxv16f64_unmasked( ; CHECK-NEXT: and a3, a3, a4 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vmfeq.vv v0, v16, v16 -; CHECK-NEXT: vmfeq.vv v7, v24, v24 -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vmerge.vvm v8, v16, v24, v0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmfeq.vv v0, v24, v24 ; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 -; CHECK-NEXT: vfmin.vv v8, v16, v8 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: vfmin.vv v16, v16, v8 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: bltu a2, a1, .LBB41_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB41_2: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vmfeq.vv v0, v16, v16 +; CHECK-NEXT: vmfeq.vv v0, v8, v8 +; CHECK-NEXT: vmfeq.vv v7, v24, v24 +; CHECK-NEXT: vmerge.vvm v16, v8, v24, v0 +; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 +; CHECK-NEXT: vfmin.vv v8, v8, v16 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmfeq.vv v7, v8, v8 -; CHECK-NEXT: vmerge.vvm v24, v16, v8, v0 -; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: vmerge.vvm v8, v8, v16, v0 -; CHECK-NEXT: vfmin.vv v8, v8, v24 -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll index 7a4695d1c25c1..3276c68b9b6ea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-constrained-sdnode.ll @@ -12,17 +12,17 @@ define @nearbyint_nxv1f16( %v) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -36,17 +36,17 @@ define @nearbyint_nxv2f16( %v) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -60,17 +60,17 @@ define @nearbyint_nxv4f16( %v) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -84,17 +84,17 @@ define @nearbyint_nxv8f16( %v) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -108,17 +108,17 @@ define @nearbyint_nxv16f16( %v) strictf ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv16f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -132,17 +132,17 @@ define @nearbyint_nxv32f16( %v) strictf ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv32f16( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -158,15 +158,15 @@ define @nearbyint_nxv1f32( %v) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -182,15 +182,15 @@ define @nearbyint_nxv2f32( %v) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -206,15 +206,15 @@ define @nearbyint_nxv4f32( %v) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -230,15 +230,15 @@ define @nearbyint_nxv8f32( %v) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -254,15 +254,15 @@ define @nearbyint_nxv16f32( %v) stric ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv16f32( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -276,17 +276,17 @@ define @nearbyint_nxv1f64( %v) strict ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv1f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -300,17 +300,17 @@ define @nearbyint_nxv2f64( %v) strict ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv2f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -324,17 +324,17 @@ define @nearbyint_nxv4f64( %v) strict ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv4f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r @@ -348,17 +348,17 @@ define @nearbyint_nxv8f64( %v) strict ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %r = call @llvm.experimental.constrained.nearbyint.nxv8f64( %v, metadata !"round.dynamic", metadata !"fpexcept.strict") ret %r diff --git a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll index 4ea3269cec0b1..78760234fa493 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fnearbyint-sdnode.ll @@ -18,18 +18,18 @@ define @nearbyint_nxv1bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv1bf16( %x) ret %a @@ -41,18 +41,18 @@ define @nearbyint_nxv2bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9 -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv2bf16( %x) ret %a @@ -64,18 +64,18 @@ define @nearbyint_nxv4bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v8, v10, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10 -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv4bf16( %x) ret %a @@ -87,18 +87,18 @@ define @nearbyint_nxv8bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v8, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12 -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv8bf16( %x) ret %a @@ -110,18 +110,18 @@ define @nearbyint_nxv16bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16 -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv16bf16( %x) ret %a @@ -133,11 +133,11 @@ define @nearbyint_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -154,11 +154,11 @@ define @nearbyint_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v12, v24 -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv32bf16( %x) ret %a @@ -167,17 +167,17 @@ define @nearbyint_nxv32bf16( %x) { define @nearbyint_nxv1f16( %x) { ; ZVFH-LABEL: nearbyint_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: nearbyint_nxv1f16: @@ -185,18 +185,18 @@ define @nearbyint_nxv1f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv1f16( %x) ret %a @@ -206,17 +206,17 @@ declare @llvm.nearbyint.nxv1f16() define @nearbyint_nxv2f16( %x) { ; ZVFH-LABEL: nearbyint_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: nearbyint_nxv2f16: @@ -224,18 +224,18 @@ define @nearbyint_nxv2f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv2f16( %x) ret %a @@ -245,17 +245,17 @@ declare @llvm.nearbyint.nxv2f16() define @nearbyint_nxv4f16( %x) { ; ZVFH-LABEL: nearbyint_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: nearbyint_nxv4f16: @@ -263,18 +263,18 @@ define @nearbyint_nxv4f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv4f16( %x) ret %a @@ -284,17 +284,17 @@ declare @llvm.nearbyint.nxv4f16() define @nearbyint_nxv8f16( %x) { ; ZVFH-LABEL: nearbyint_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: nearbyint_nxv8f16: @@ -302,18 +302,18 @@ define @nearbyint_nxv8f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv8f16( %x) ret %a @@ -323,17 +323,17 @@ declare @llvm.nearbyint.nxv8f16() define @nearbyint_nxv16f16( %x) { ; ZVFH-LABEL: nearbyint_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: nearbyint_nxv16f16: @@ -341,18 +341,18 @@ define @nearbyint_nxv16f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv16f16( %x) ret %a @@ -362,17 +362,17 @@ declare @llvm.nearbyint.nxv16f16() define @nearbyint_nxv32f16( %x) { ; ZVFH-LABEL: nearbyint_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: nearbyint_nxv32f16: @@ -380,11 +380,11 @@ define @nearbyint_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu @@ -401,11 +401,11 @@ define @nearbyint_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v12, v24 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %a = call @llvm.nearbyint.nxv32f16( %x) ret %a @@ -419,13 +419,13 @@ define @nearbyint_nxv1f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv1f32( %x) ret %a @@ -439,13 +439,13 @@ define @nearbyint_nxv2f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv2f32( %x) ret %a @@ -459,13 +459,13 @@ define @nearbyint_nxv4f32( %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv4f32( %x) ret %a @@ -479,13 +479,13 @@ define @nearbyint_nxv8f32( %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv8f32( %x) ret %a @@ -499,13 +499,13 @@ define @nearbyint_nxv16f32( %x) { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv16f32( %x) ret %a @@ -515,17 +515,17 @@ declare @llvm.nearbyint.nxv16f32() define @nearbyint_nxv1f64( %x) { ; CHECK-LABEL: nearbyint_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv1f64( %x) ret %a @@ -535,17 +535,17 @@ declare @llvm.nearbyint.nxv1f64() define @nearbyint_nxv2f64( %x) { ; CHECK-LABEL: nearbyint_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv2f64( %x) ret %a @@ -555,17 +555,17 @@ declare @llvm.nearbyint.nxv2f64() define @nearbyint_nxv4f64( %x) { ; CHECK-LABEL: nearbyint_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv4f64( %x) ret %a @@ -575,17 +575,17 @@ declare @llvm.nearbyint.nxv4f64() define @nearbyint_nxv8f64( %x) { ; CHECK-LABEL: nearbyint_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %a = call @llvm.nearbyint.nxv8f64( %x) ret %a diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll index 5e657a93ec0d6..a420e9ecee563 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-scalar-load-crash.ll @@ -7,10 +7,10 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) { ; RV32-LABEL: test: ; RV32: # %bb.0: # %entry -; RV32-NEXT: addi a3, a2, 1 -; RV32-NEXT: th.lbib a4, (a1), -1, 0 +; RV32-NEXT: th.lbib a3, (a1), -1, 0 ; RV32-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV32-NEXT: vmv.v.x v8, a4 +; RV32-NEXT: vmv.v.x v8, a3 +; RV32-NEXT: addi a3, a2, 1 ; RV32-NEXT: vmv.s.x v9, zero ; RV32-NEXT: vsetvli zero, a3, e8, mf2, tu, ma ; RV32-NEXT: vslideup.vx v8, v9, a2 @@ -33,10 +33,10 @@ define i32 @test(i32 %size, ptr %add.ptr, i64 %const) { ; ; RV64-LABEL: test: ; RV64: # %bb.0: # %entry -; RV64-NEXT: addi a3, a2, 1 -; RV64-NEXT: th.lbib a4, (a1), -1, 0 +; RV64-NEXT: th.lbib a3, (a1), -1, 0 ; RV64-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; RV64-NEXT: vmv.v.x v8, a4 +; RV64-NEXT: vmv.v.x v8, a3 +; RV64-NEXT: addi a3, a2, 1 ; RV64-NEXT: vmv.s.x v9, zero ; RV64-NEXT: vsetvli zero, a3, e8, mf2, tu, ma ; RV64-NEXT: vslideup.vx v8, v9, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll index e24b23c9b2d32..7504c570e6c7a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fold-vector-cmp.ll @@ -15,11 +15,11 @@ define i32 @test(i32 %call.i) { ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-V-NEXT: vmv.v.x v8, a0 ; CHECK-V-NEXT: lui a0, 524288 +; CHECK-V-NEXT: vmv.v.i v9, 0 ; CHECK-V-NEXT: vslide1down.vx v8, v8, a0 ; CHECK-V-NEXT: addi a0, a0, 2 ; CHECK-V-NEXT: vmslt.vx v0, v8, a0 -; CHECK-V-NEXT: vmv.v.i v8, 0 -; CHECK-V-NEXT: vmerge.vim v8, v8, 1, v0 +; CHECK-V-NEXT: vmerge.vim v8, v9, 1, v0 ; CHECK-V-NEXT: vslidedown.vi v8, v8, 1 ; CHECK-V-NEXT: vmv.x.s a0, v8 ; CHECK-V-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll index f6598606b09f1..052a10e0adcdc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fpclamptosat_vec.ll @@ -9,10 +9,10 @@ define <2 x i32> @stest_f64i32(<2 x double> %x) { ; CHECK-NOV-LABEL: stest_f64i32: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NOV-NEXT: lui a2, 524288 ; CHECK-NOV-NEXT: addiw a3, a2, -1 -; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NOV-NEXT: bge a1, a3, .LBB0_5 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: bge a0, a3, .LBB0_6 @@ -55,10 +55,10 @@ entry: define <2 x i32> @utest_f64i32(<2 x double> %x) { ; CHECK-NOV-LABEL: utest_f64i32: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.lu.d a1, fa1, rtz ; CHECK-NOV-NEXT: fcvt.lu.d a0, fa0, rtz ; CHECK-NOV-NEXT: li a2, -1 ; CHECK-NOV-NEXT: srli a2, a2, 32 -; CHECK-NOV-NEXT: fcvt.lu.d a1, fa1, rtz ; CHECK-NOV-NEXT: bgeu a0, a2, .LBB1_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: bgeu a1, a2, .LBB1_4 @@ -89,10 +89,10 @@ entry: define <2 x i32> @ustest_f64i32(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i32: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NOV-NEXT: li a2, -1 ; CHECK-NOV-NEXT: srli a2, a2, 32 -; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NOV-NEXT: blt a1, a2, .LBB2_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: mv a1, a2 @@ -130,14 +130,14 @@ entry: define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NOV-LABEL: stest_f32i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz +; CHECK-NOV-NEXT: fcvt.l.s a1, fa2, rtz +; CHECK-NOV-NEXT: fcvt.l.s a2, fa3, rtz ; CHECK-NOV-NEXT: lui a3, 524288 ; CHECK-NOV-NEXT: addiw a6, a3, -1 -; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a6, .LBB3_10 +; CHECK-NOV-NEXT: bge a2, a6, .LBB3_10 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a6, .LBB3_11 +; CHECK-NOV-NEXT: bge a1, a6, .LBB3_11 ; CHECK-NOV-NEXT: .LBB3_2: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz ; CHECK-NOV-NEXT: bge a4, a6, .LBB3_12 @@ -148,23 +148,23 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB3_5: # %entry ; CHECK-NOV-NEXT: bge a3, a4, .LBB3_15 ; CHECK-NOV-NEXT: .LBB3_6: # %entry -; CHECK-NOV-NEXT: bge a3, a2, .LBB3_16 +; CHECK-NOV-NEXT: bge a3, a1, .LBB3_16 ; CHECK-NOV-NEXT: .LBB3_7: # %entry -; CHECK-NOV-NEXT: blt a3, a1, .LBB3_9 +; CHECK-NOV-NEXT: blt a3, a2, .LBB3_9 ; CHECK-NOV-NEXT: .LBB3_8: # %entry -; CHECK-NOV-NEXT: lui a1, 524288 +; CHECK-NOV-NEXT: lui a2, 524288 ; CHECK-NOV-NEXT: .LBB3_9: # %entry ; CHECK-NOV-NEXT: sw a5, 0(a0) ; CHECK-NOV-NEXT: sw a4, 4(a0) -; CHECK-NOV-NEXT: sw a2, 8(a0) -; CHECK-NOV-NEXT: sw a1, 12(a0) +; CHECK-NOV-NEXT: sw a1, 8(a0) +; CHECK-NOV-NEXT: sw a2, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB3_10: # %entry -; CHECK-NOV-NEXT: mv a1, a6 +; CHECK-NOV-NEXT: mv a2, a6 ; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a6, .LBB3_2 +; CHECK-NOV-NEXT: blt a1, a6, .LBB3_2 ; CHECK-NOV-NEXT: .LBB3_11: # %entry -; CHECK-NOV-NEXT: mv a2, a6 +; CHECK-NOV-NEXT: mv a1, a6 ; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz ; CHECK-NOV-NEXT: blt a4, a6, .LBB3_3 ; CHECK-NOV-NEXT: .LBB3_12: # %entry @@ -178,10 +178,10 @@ define <4 x i32> @stest_f32i32(<4 x float> %x) { ; CHECK-NOV-NEXT: blt a3, a4, .LBB3_6 ; CHECK-NOV-NEXT: .LBB3_15: # %entry ; CHECK-NOV-NEXT: lui a4, 524288 -; CHECK-NOV-NEXT: blt a3, a2, .LBB3_7 +; CHECK-NOV-NEXT: blt a3, a1, .LBB3_7 ; CHECK-NOV-NEXT: .LBB3_16: # %entry -; CHECK-NOV-NEXT: lui a2, 524288 -; CHECK-NOV-NEXT: bge a3, a1, .LBB3_8 +; CHECK-NOV-NEXT: lui a1, 524288 +; CHECK-NOV-NEXT: bge a3, a2, .LBB3_8 ; CHECK-NOV-NEXT: j .LBB3_9 ; ; CHECK-V-LABEL: stest_f32i32: @@ -203,14 +203,14 @@ entry: define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NOV-LABEL: utest_f32i32: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.lu.s a1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.lu.s a1, fa1, rtz +; CHECK-NOV-NEXT: fcvt.lu.s a2, fa0, rtz ; CHECK-NOV-NEXT: li a3, -1 ; CHECK-NOV-NEXT: srli a3, a3, 32 -; CHECK-NOV-NEXT: fcvt.lu.s a2, fa1, rtz -; CHECK-NOV-NEXT: bgeu a1, a3, .LBB4_6 +; CHECK-NOV-NEXT: bgeu a2, a3, .LBB4_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz -; CHECK-NOV-NEXT: bgeu a2, a3, .LBB4_7 +; CHECK-NOV-NEXT: bgeu a1, a3, .LBB4_7 ; CHECK-NOV-NEXT: .LBB4_2: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz ; CHECK-NOV-NEXT: bgeu a4, a3, .LBB4_8 @@ -219,17 +219,17 @@ define <4 x i32> @utest_f32i32(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB4_4: # %entry ; CHECK-NOV-NEXT: mv a5, a3 ; CHECK-NOV-NEXT: .LBB4_5: # %entry -; CHECK-NOV-NEXT: sw a1, 0(a0) -; CHECK-NOV-NEXT: sw a2, 4(a0) +; CHECK-NOV-NEXT: sw a2, 0(a0) +; CHECK-NOV-NEXT: sw a1, 4(a0) ; CHECK-NOV-NEXT: sw a4, 8(a0) ; CHECK-NOV-NEXT: sw a5, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB4_6: # %entry -; CHECK-NOV-NEXT: mv a1, a3 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz -; CHECK-NOV-NEXT: bltu a2, a3, .LBB4_2 +; CHECK-NOV-NEXT: bltu a1, a3, .LBB4_2 ; CHECK-NOV-NEXT: .LBB4_7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv a1, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz ; CHECK-NOV-NEXT: bltu a4, a3, .LBB4_3 ; CHECK-NOV-NEXT: .LBB4_8: # %entry @@ -254,10 +254,10 @@ entry: define <4 x i32> @ustest_f32i32(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i32: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz ; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz ; CHECK-NOV-NEXT: li a4, -1 ; CHECK-NOV-NEXT: srli a4, a4, 32 -; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz ; CHECK-NOV-NEXT: bge a1, a4, .LBB5_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz @@ -341,12 +341,12 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 ; CHECK-NOV-NEXT: .cfi_remember_state +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: lhu s1, 0(a1) ; CHECK-NOV-NEXT: lhu s2, 8(a1) -; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu a0, 16(a1) ; CHECK-NOV-NEXT: lhu s3, 24(a1) -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 @@ -355,8 +355,8 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: lui a1, 524288 @@ -454,11 +454,11 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -473,11 +473,11 @@ define <4 x i32> @stest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -541,22 +541,22 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 ; CHECK-NOV-NEXT: .cfi_remember_state -; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu a2, 8(a1) -; CHECK-NOV-NEXT: lhu s2, 16(a1) -; CHECK-NOV-NEXT: lhu s3, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: lhu s2, 0(a1) +; CHECK-NOV-NEXT: lhu a0, 8(a1) +; CHECK-NOV-NEXT: lhu s1, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: fcvt.lu.s s1, fs2, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-NOV-NEXT: li a1, -1 @@ -634,11 +634,11 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -653,11 +653,11 @@ define <4 x i32> @utesth_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -719,12 +719,12 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 ; CHECK-NOV-NEXT: .cfi_remember_state +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: lhu s1, 0(a1) ; CHECK-NOV-NEXT: lhu s2, 8(a1) -; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu a0, 16(a1) ; CHECK-NOV-NEXT: lhu s3, 24(a1) -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 @@ -733,8 +733,8 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: li a2, -1 @@ -824,11 +824,11 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -843,11 +843,11 @@ define <4 x i32> @ustest_f16i32(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -895,10 +895,10 @@ entry: define <2 x i16> @stest_f64i16(<2 x double> %x) { ; CHECK-NOV-LABEL: stest_f64i16: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NOV-NEXT: lui a2, 8 ; CHECK-NOV-NEXT: addiw a2, a2, -1 -; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NOV-NEXT: bge a1, a2, .LBB9_5 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: bge a0, a2, .LBB9_6 @@ -943,10 +943,10 @@ entry: define <2 x i16> @utest_f64i16(<2 x double> %x) { ; CHECK-NOV-LABEL: utest_f64i16: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.wu.d a1, fa1, rtz ; CHECK-NOV-NEXT: fcvt.wu.d a0, fa0, rtz ; CHECK-NOV-NEXT: lui a2, 16 ; CHECK-NOV-NEXT: addiw a2, a2, -1 -; CHECK-NOV-NEXT: fcvt.wu.d a1, fa1, rtz ; CHECK-NOV-NEXT: bgeu a0, a2, .LBB10_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: bgeu a1, a2, .LBB10_4 @@ -977,10 +977,10 @@ entry: define <2 x i16> @ustest_f64i16(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i16: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NOV-NEXT: lui a2, 16 ; CHECK-NOV-NEXT: addiw a2, a2, -1 -; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NOV-NEXT: blt a1, a2, .LBB11_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: mv a1, a2 @@ -1018,14 +1018,14 @@ entry: define <4 x i16> @stest_f32i16(<4 x float> %x) { ; CHECK-NOV-LABEL: stest_f32i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz +; CHECK-NOV-NEXT: fcvt.w.s a1, fa2, rtz +; CHECK-NOV-NEXT: fcvt.w.s a2, fa3, rtz ; CHECK-NOV-NEXT: lui a5, 8 ; CHECK-NOV-NEXT: addiw a5, a5, -1 -; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a5, .LBB12_10 +; CHECK-NOV-NEXT: bge a2, a5, .LBB12_10 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a5, .LBB12_11 +; CHECK-NOV-NEXT: bge a1, a5, .LBB12_11 ; CHECK-NOV-NEXT: .LBB12_2: # %entry ; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz ; CHECK-NOV-NEXT: bge a3, a5, .LBB12_12 @@ -1037,23 +1037,23 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB12_5: # %entry ; CHECK-NOV-NEXT: bge a5, a3, .LBB12_15 ; CHECK-NOV-NEXT: .LBB12_6: # %entry -; CHECK-NOV-NEXT: bge a5, a2, .LBB12_16 +; CHECK-NOV-NEXT: bge a5, a1, .LBB12_16 ; CHECK-NOV-NEXT: .LBB12_7: # %entry -; CHECK-NOV-NEXT: blt a5, a1, .LBB12_9 +; CHECK-NOV-NEXT: blt a5, a2, .LBB12_9 ; CHECK-NOV-NEXT: .LBB12_8: # %entry -; CHECK-NOV-NEXT: lui a1, 1048568 +; CHECK-NOV-NEXT: lui a2, 1048568 ; CHECK-NOV-NEXT: .LBB12_9: # %entry ; CHECK-NOV-NEXT: sh a4, 0(a0) ; CHECK-NOV-NEXT: sh a3, 2(a0) -; CHECK-NOV-NEXT: sh a2, 4(a0) -; CHECK-NOV-NEXT: sh a1, 6(a0) +; CHECK-NOV-NEXT: sh a1, 4(a0) +; CHECK-NOV-NEXT: sh a2, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB12_10: # %entry -; CHECK-NOV-NEXT: mv a1, a5 +; CHECK-NOV-NEXT: mv a2, a5 ; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a5, .LBB12_2 +; CHECK-NOV-NEXT: blt a1, a5, .LBB12_2 ; CHECK-NOV-NEXT: .LBB12_11: # %entry -; CHECK-NOV-NEXT: mv a2, a5 +; CHECK-NOV-NEXT: mv a1, a5 ; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz ; CHECK-NOV-NEXT: blt a3, a5, .LBB12_3 ; CHECK-NOV-NEXT: .LBB12_12: # %entry @@ -1068,10 +1068,10 @@ define <4 x i16> @stest_f32i16(<4 x float> %x) { ; CHECK-NOV-NEXT: blt a5, a3, .LBB12_6 ; CHECK-NOV-NEXT: .LBB12_15: # %entry ; CHECK-NOV-NEXT: lui a3, 1048568 -; CHECK-NOV-NEXT: blt a5, a2, .LBB12_7 +; CHECK-NOV-NEXT: blt a5, a1, .LBB12_7 ; CHECK-NOV-NEXT: .LBB12_16: # %entry -; CHECK-NOV-NEXT: lui a2, 1048568 -; CHECK-NOV-NEXT: bge a5, a1, .LBB12_8 +; CHECK-NOV-NEXT: lui a1, 1048568 +; CHECK-NOV-NEXT: bge a5, a2, .LBB12_8 ; CHECK-NOV-NEXT: j .LBB12_9 ; ; CHECK-V-LABEL: stest_f32i16: @@ -1094,14 +1094,14 @@ entry: define <4 x i16> @utest_f32i16(<4 x float> %x) { ; CHECK-NOV-LABEL: utest_f32i16: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.wu.s a1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.wu.s a1, fa1, rtz +; CHECK-NOV-NEXT: fcvt.wu.s a2, fa0, rtz ; CHECK-NOV-NEXT: lui a3, 16 ; CHECK-NOV-NEXT: addiw a3, a3, -1 -; CHECK-NOV-NEXT: fcvt.wu.s a2, fa1, rtz -; CHECK-NOV-NEXT: bgeu a1, a3, .LBB13_6 +; CHECK-NOV-NEXT: bgeu a2, a3, .LBB13_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz -; CHECK-NOV-NEXT: bgeu a2, a3, .LBB13_7 +; CHECK-NOV-NEXT: bgeu a1, a3, .LBB13_7 ; CHECK-NOV-NEXT: .LBB13_2: # %entry ; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz ; CHECK-NOV-NEXT: bgeu a4, a3, .LBB13_8 @@ -1110,17 +1110,17 @@ define <4 x i16> @utest_f32i16(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB13_4: # %entry ; CHECK-NOV-NEXT: mv a5, a3 ; CHECK-NOV-NEXT: .LBB13_5: # %entry -; CHECK-NOV-NEXT: sh a1, 0(a0) -; CHECK-NOV-NEXT: sh a2, 2(a0) +; CHECK-NOV-NEXT: sh a2, 0(a0) +; CHECK-NOV-NEXT: sh a1, 2(a0) ; CHECK-NOV-NEXT: sh a4, 4(a0) ; CHECK-NOV-NEXT: sh a5, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB13_6: # %entry -; CHECK-NOV-NEXT: mv a1, a3 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz -; CHECK-NOV-NEXT: bltu a2, a3, .LBB13_2 +; CHECK-NOV-NEXT: bltu a1, a3, .LBB13_2 ; CHECK-NOV-NEXT: .LBB13_7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv a1, a3 ; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz ; CHECK-NOV-NEXT: bltu a4, a3, .LBB13_3 ; CHECK-NOV-NEXT: .LBB13_8: # %entry @@ -1146,10 +1146,10 @@ entry: define <4 x i16> @ustest_f32i16(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i16: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz ; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz ; CHECK-NOV-NEXT: lui a4, 16 ; CHECK-NOV-NEXT: addiw a4, a4, -1 -; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz ; CHECK-NOV-NEXT: bge a1, a4, .LBB14_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz @@ -1248,16 +1248,16 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 ; CHECK-NOV-NEXT: .cfi_remember_state +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: lhu s1, 32(a1) ; CHECK-NOV-NEXT: lhu s2, 40(a1) -; CHECK-NOV-NEXT: lhu a2, 48(a1) +; CHECK-NOV-NEXT: lhu a0, 48(a1) ; CHECK-NOV-NEXT: lhu s3, 56(a1) ; CHECK-NOV-NEXT: lhu s4, 0(a1) ; CHECK-NOV-NEXT: lhu s5, 8(a1) ; CHECK-NOV-NEXT: lhu s6, 16(a1) ; CHECK-NOV-NEXT: lhu s7, 24(a1) -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 @@ -1278,8 +1278,8 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: lui a7, 8 @@ -1458,7 +1458,6 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -1466,6 +1465,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -1483,7 +1483,6 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -1491,6 +1490,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -1515,11 +1515,11 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -1534,11 +1534,11 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -1632,16 +1632,16 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 ; CHECK-NOV-NEXT: .cfi_remember_state +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: lhu s1, 32(a1) ; CHECK-NOV-NEXT: lhu s2, 40(a1) ; CHECK-NOV-NEXT: lhu s3, 48(a1) ; CHECK-NOV-NEXT: lhu s4, 56(a1) ; CHECK-NOV-NEXT: lhu s5, 0(a1) -; CHECK-NOV-NEXT: lhu a2, 8(a1) +; CHECK-NOV-NEXT: lhu a0, 8(a1) ; CHECK-NOV-NEXT: lhu s6, 16(a1) ; CHECK-NOV-NEXT: lhu s7, 24(a1) -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s6 @@ -1662,8 +1662,8 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-NOV-NEXT: lui a3, 16 @@ -1800,7 +1800,6 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -1808,6 +1807,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -1825,7 +1825,6 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -1833,6 +1832,7 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -1857,11 +1857,11 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -1876,11 +1876,11 @@ define <8 x i16> @utesth_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -1972,16 +1972,16 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 ; CHECK-NOV-NEXT: .cfi_remember_state +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: lhu s1, 32(a1) ; CHECK-NOV-NEXT: lhu s2, 40(a1) -; CHECK-NOV-NEXT: lhu a2, 48(a1) +; CHECK-NOV-NEXT: lhu a0, 48(a1) ; CHECK-NOV-NEXT: lhu s3, 56(a1) ; CHECK-NOV-NEXT: lhu s4, 0(a1) ; CHECK-NOV-NEXT: lhu s5, 8(a1) ; CHECK-NOV-NEXT: lhu s6, 16(a1) ; CHECK-NOV-NEXT: lhu s7, 24(a1) -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 @@ -2002,8 +2002,8 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: lui a4, 16 @@ -2164,7 +2164,6 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -2172,6 +2171,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -2189,7 +2189,6 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -2197,6 +2196,7 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -2221,11 +2221,11 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -2240,11 +2240,11 @@ define <8 x i16> @ustest_f16i16(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -3576,10 +3576,10 @@ entry: define <2 x i32> @stest_f64i32_mm(<2 x double> %x) { ; CHECK-NOV-LABEL: stest_f64i32_mm: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NOV-NEXT: lui a2, 524288 ; CHECK-NOV-NEXT: addiw a3, a2, -1 -; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NOV-NEXT: bge a1, a3, .LBB27_5 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: bge a0, a3, .LBB27_6 @@ -3620,10 +3620,10 @@ entry: define <2 x i32> @utest_f64i32_mm(<2 x double> %x) { ; CHECK-NOV-LABEL: utest_f64i32_mm: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.lu.d a1, fa1, rtz ; CHECK-NOV-NEXT: fcvt.lu.d a0, fa0, rtz ; CHECK-NOV-NEXT: li a2, -1 ; CHECK-NOV-NEXT: srli a2, a2, 32 -; CHECK-NOV-NEXT: fcvt.lu.d a1, fa1, rtz ; CHECK-NOV-NEXT: bgeu a0, a2, .LBB28_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: bgeu a1, a2, .LBB28_4 @@ -3653,10 +3653,10 @@ entry: define <2 x i32> @ustest_f64i32_mm(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i32_mm: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NOV-NEXT: fcvt.l.d a1, fa1, rtz ; CHECK-NOV-NEXT: li a2, -1 ; CHECK-NOV-NEXT: srli a2, a2, 32 -; CHECK-NOV-NEXT: fcvt.l.d a0, fa0, rtz ; CHECK-NOV-NEXT: blt a1, a2, .LBB29_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: mv a1, a2 @@ -3692,14 +3692,14 @@ entry: define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NOV-LABEL: stest_f32i32_mm: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz +; CHECK-NOV-NEXT: fcvt.l.s a1, fa2, rtz +; CHECK-NOV-NEXT: fcvt.l.s a2, fa3, rtz ; CHECK-NOV-NEXT: lui a3, 524288 ; CHECK-NOV-NEXT: addiw a6, a3, -1 -; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a6, .LBB30_10 +; CHECK-NOV-NEXT: bge a2, a6, .LBB30_10 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a6, .LBB30_11 +; CHECK-NOV-NEXT: bge a1, a6, .LBB30_11 ; CHECK-NOV-NEXT: .LBB30_2: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz ; CHECK-NOV-NEXT: bge a4, a6, .LBB30_12 @@ -3710,23 +3710,23 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB30_5: # %entry ; CHECK-NOV-NEXT: bge a3, a4, .LBB30_15 ; CHECK-NOV-NEXT: .LBB30_6: # %entry -; CHECK-NOV-NEXT: bge a3, a2, .LBB30_16 +; CHECK-NOV-NEXT: bge a3, a1, .LBB30_16 ; CHECK-NOV-NEXT: .LBB30_7: # %entry -; CHECK-NOV-NEXT: blt a3, a1, .LBB30_9 +; CHECK-NOV-NEXT: blt a3, a2, .LBB30_9 ; CHECK-NOV-NEXT: .LBB30_8: # %entry -; CHECK-NOV-NEXT: lui a1, 524288 +; CHECK-NOV-NEXT: lui a2, 524288 ; CHECK-NOV-NEXT: .LBB30_9: # %entry ; CHECK-NOV-NEXT: sw a5, 0(a0) ; CHECK-NOV-NEXT: sw a4, 4(a0) -; CHECK-NOV-NEXT: sw a2, 8(a0) -; CHECK-NOV-NEXT: sw a1, 12(a0) +; CHECK-NOV-NEXT: sw a1, 8(a0) +; CHECK-NOV-NEXT: sw a2, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB30_10: # %entry -; CHECK-NOV-NEXT: mv a1, a6 +; CHECK-NOV-NEXT: mv a2, a6 ; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a6, .LBB30_2 +; CHECK-NOV-NEXT: blt a1, a6, .LBB30_2 ; CHECK-NOV-NEXT: .LBB30_11: # %entry -; CHECK-NOV-NEXT: mv a2, a6 +; CHECK-NOV-NEXT: mv a1, a6 ; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz ; CHECK-NOV-NEXT: blt a4, a6, .LBB30_3 ; CHECK-NOV-NEXT: .LBB30_12: # %entry @@ -3740,10 +3740,10 @@ define <4 x i32> @stest_f32i32_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: blt a3, a4, .LBB30_6 ; CHECK-NOV-NEXT: .LBB30_15: # %entry ; CHECK-NOV-NEXT: lui a4, 524288 -; CHECK-NOV-NEXT: blt a3, a2, .LBB30_7 +; CHECK-NOV-NEXT: blt a3, a1, .LBB30_7 ; CHECK-NOV-NEXT: .LBB30_16: # %entry -; CHECK-NOV-NEXT: lui a2, 524288 -; CHECK-NOV-NEXT: bge a3, a1, .LBB30_8 +; CHECK-NOV-NEXT: lui a1, 524288 +; CHECK-NOV-NEXT: bge a3, a2, .LBB30_8 ; CHECK-NOV-NEXT: j .LBB30_9 ; ; CHECK-V-LABEL: stest_f32i32_mm: @@ -3763,14 +3763,14 @@ entry: define <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-NOV-LABEL: utest_f32i32_mm: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.lu.s a1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.lu.s a1, fa1, rtz +; CHECK-NOV-NEXT: fcvt.lu.s a2, fa0, rtz ; CHECK-NOV-NEXT: li a3, -1 ; CHECK-NOV-NEXT: srli a3, a3, 32 -; CHECK-NOV-NEXT: fcvt.lu.s a2, fa1, rtz -; CHECK-NOV-NEXT: bgeu a1, a3, .LBB31_6 +; CHECK-NOV-NEXT: bgeu a2, a3, .LBB31_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz -; CHECK-NOV-NEXT: bgeu a2, a3, .LBB31_7 +; CHECK-NOV-NEXT: bgeu a1, a3, .LBB31_7 ; CHECK-NOV-NEXT: .LBB31_2: # %entry ; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz ; CHECK-NOV-NEXT: bgeu a4, a3, .LBB31_8 @@ -3779,17 +3779,17 @@ define <4 x i32> @utest_f32i32_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB31_4: # %entry ; CHECK-NOV-NEXT: mv a5, a3 ; CHECK-NOV-NEXT: .LBB31_5: # %entry -; CHECK-NOV-NEXT: sw a1, 0(a0) -; CHECK-NOV-NEXT: sw a2, 4(a0) +; CHECK-NOV-NEXT: sw a2, 0(a0) +; CHECK-NOV-NEXT: sw a1, 4(a0) ; CHECK-NOV-NEXT: sw a4, 8(a0) ; CHECK-NOV-NEXT: sw a5, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB31_6: # %entry -; CHECK-NOV-NEXT: mv a1, a3 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a4, fa2, rtz -; CHECK-NOV-NEXT: bltu a2, a3, .LBB31_2 +; CHECK-NOV-NEXT: bltu a1, a3, .LBB31_2 ; CHECK-NOV-NEXT: .LBB31_7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv a1, a3 ; CHECK-NOV-NEXT: fcvt.lu.s a5, fa3, rtz ; CHECK-NOV-NEXT: bltu a4, a3, .LBB31_3 ; CHECK-NOV-NEXT: .LBB31_8: # %entry @@ -3813,50 +3813,50 @@ entry: define <4 x i32> @ustest_f32i32_mm(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i32_mm: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz -; CHECK-NOV-NEXT: li a3, -1 -; CHECK-NOV-NEXT: srli a3, a3, 32 ; CHECK-NOV-NEXT: fcvt.l.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a3, .LBB32_6 +; CHECK-NOV-NEXT: fcvt.l.s a1, fa3, rtz +; CHECK-NOV-NEXT: li a4, -1 +; CHECK-NOV-NEXT: srli a4, a4, 32 +; CHECK-NOV-NEXT: bge a1, a4, .LBB32_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a3, .LBB32_7 +; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz +; CHECK-NOV-NEXT: bge a2, a4, .LBB32_7 ; CHECK-NOV-NEXT: .LBB32_2: # %entry ; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz -; CHECK-NOV-NEXT: bge a4, a3, .LBB32_8 +; CHECK-NOV-NEXT: bge a3, a4, .LBB32_8 ; CHECK-NOV-NEXT: .LBB32_3: # %entry -; CHECK-NOV-NEXT: blt a5, a3, .LBB32_5 +; CHECK-NOV-NEXT: blt a5, a4, .LBB32_5 ; CHECK-NOV-NEXT: .LBB32_4: # %entry -; CHECK-NOV-NEXT: mv a5, a3 +; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB32_5: # %entry -; CHECK-NOV-NEXT: sgtz a3, a5 -; CHECK-NOV-NEXT: negw a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a5 -; CHECK-NOV-NEXT: sgtz a5, a4 +; CHECK-NOV-NEXT: sgtz a4, a5 +; CHECK-NOV-NEXT: negw a4, a4 +; CHECK-NOV-NEXT: and a4, a4, a5 +; CHECK-NOV-NEXT: sgtz a5, a3 ; CHECK-NOV-NEXT: negw a5, a5 -; CHECK-NOV-NEXT: and a4, a5, a4 +; CHECK-NOV-NEXT: and a3, a5, a3 ; CHECK-NOV-NEXT: sgtz a5, a2 ; CHECK-NOV-NEXT: negw a5, a5 ; CHECK-NOV-NEXT: and a2, a5, a2 ; CHECK-NOV-NEXT: sgtz a5, a1 ; CHECK-NOV-NEXT: negw a5, a5 ; CHECK-NOV-NEXT: and a1, a5, a1 -; CHECK-NOV-NEXT: sw a3, 0(a0) -; CHECK-NOV-NEXT: sw a4, 4(a0) +; CHECK-NOV-NEXT: sw a4, 0(a0) +; CHECK-NOV-NEXT: sw a3, 4(a0) ; CHECK-NOV-NEXT: sw a2, 8(a0) ; CHECK-NOV-NEXT: sw a1, 12(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB32_6: # %entry -; CHECK-NOV-NEXT: mv a1, a3 -; CHECK-NOV-NEXT: fcvt.l.s a4, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a3, .LBB32_2 +; CHECK-NOV-NEXT: mv a1, a4 +; CHECK-NOV-NEXT: fcvt.l.s a3, fa1, rtz +; CHECK-NOV-NEXT: blt a2, a4, .LBB32_2 ; CHECK-NOV-NEXT: .LBB32_7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv a2, a4 ; CHECK-NOV-NEXT: fcvt.l.s a5, fa0, rtz -; CHECK-NOV-NEXT: blt a4, a3, .LBB32_3 +; CHECK-NOV-NEXT: blt a3, a4, .LBB32_3 ; CHECK-NOV-NEXT: .LBB32_8: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bge a5, a3, .LBB32_4 +; CHECK-NOV-NEXT: mv a3, a4 +; CHECK-NOV-NEXT: bge a5, a4, .LBB32_4 ; CHECK-NOV-NEXT: j .LBB32_5 ; ; CHECK-V-LABEL: ustest_f32i32_mm: @@ -3898,12 +3898,12 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 ; CHECK-NOV-NEXT: .cfi_remember_state +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: lhu s1, 0(a1) ; CHECK-NOV-NEXT: lhu s2, 8(a1) -; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu a0, 16(a1) ; CHECK-NOV-NEXT: lhu s3, 24(a1) -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 @@ -3912,8 +3912,8 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: lui a1, 524288 @@ -4011,11 +4011,11 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -4030,11 +4030,11 @@ define <4 x i32> @stest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -4096,22 +4096,22 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 ; CHECK-NOV-NEXT: .cfi_remember_state -; CHECK-NOV-NEXT: lhu s1, 0(a1) -; CHECK-NOV-NEXT: lhu a2, 8(a1) -; CHECK-NOV-NEXT: lhu s2, 16(a1) -; CHECK-NOV-NEXT: lhu s3, 24(a1) ; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: lhu s2, 0(a1) +; CHECK-NOV-NEXT: lhu a0, 8(a1) +; CHECK-NOV-NEXT: lhu s1, 16(a1) +; CHECK-NOV-NEXT: lhu s3, 24(a1) +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s2 +; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs1, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: fcvt.lu.s s1, fs2, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s2 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-NOV-NEXT: li a1, -1 @@ -4189,11 +4189,11 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -4208,11 +4208,11 @@ define <4 x i32> @utesth_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -4273,12 +4273,12 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs1, -56 ; CHECK-NOV-NEXT: .cfi_offset fs2, -64 ; CHECK-NOV-NEXT: .cfi_remember_state +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: lhu s1, 0(a1) ; CHECK-NOV-NEXT: lhu s2, 8(a1) -; CHECK-NOV-NEXT: lhu a2, 16(a1) +; CHECK-NOV-NEXT: lhu a0, 16(a1) ; CHECK-NOV-NEXT: lhu s3, 24(a1) -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs2, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 @@ -4287,8 +4287,8 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s1 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs2, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: li a2, -1 @@ -4378,11 +4378,11 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -4397,11 +4397,11 @@ define <4 x i32> @ustest_f16i32_mm(<4 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e64, m1, ta, ma @@ -4447,10 +4447,10 @@ entry: define <2 x i16> @stest_f64i16_mm(<2 x double> %x) { ; CHECK-NOV-LABEL: stest_f64i16_mm: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NOV-NEXT: lui a2, 8 ; CHECK-NOV-NEXT: addiw a2, a2, -1 -; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NOV-NEXT: bge a1, a2, .LBB36_5 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: bge a0, a2, .LBB36_6 @@ -4493,10 +4493,10 @@ entry: define <2 x i16> @utest_f64i16_mm(<2 x double> %x) { ; CHECK-NOV-LABEL: utest_f64i16_mm: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.wu.d a1, fa1, rtz ; CHECK-NOV-NEXT: fcvt.wu.d a0, fa0, rtz ; CHECK-NOV-NEXT: lui a2, 16 ; CHECK-NOV-NEXT: addiw a2, a2, -1 -; CHECK-NOV-NEXT: fcvt.wu.d a1, fa1, rtz ; CHECK-NOV-NEXT: bgeu a0, a2, .LBB37_3 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: bgeu a1, a2, .LBB37_4 @@ -4526,10 +4526,10 @@ entry: define <2 x i16> @ustest_f64i16_mm(<2 x double> %x) { ; CHECK-NOV-LABEL: ustest_f64i16_mm: ; CHECK-NOV: # %bb.0: # %entry +; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NOV-NEXT: fcvt.w.d a1, fa1, rtz ; CHECK-NOV-NEXT: lui a2, 16 ; CHECK-NOV-NEXT: addiw a2, a2, -1 -; CHECK-NOV-NEXT: fcvt.w.d a0, fa0, rtz ; CHECK-NOV-NEXT: blt a1, a2, .LBB38_2 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: mv a1, a2 @@ -4565,14 +4565,14 @@ entry: define <4 x i16> @stest_f32i16_mm(<4 x float> %x) { ; CHECK-NOV-LABEL: stest_f32i16_mm: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz +; CHECK-NOV-NEXT: fcvt.w.s a1, fa2, rtz +; CHECK-NOV-NEXT: fcvt.w.s a2, fa3, rtz ; CHECK-NOV-NEXT: lui a5, 8 ; CHECK-NOV-NEXT: addiw a5, a5, -1 -; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a5, .LBB39_10 +; CHECK-NOV-NEXT: bge a2, a5, .LBB39_10 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a5, .LBB39_11 +; CHECK-NOV-NEXT: bge a1, a5, .LBB39_11 ; CHECK-NOV-NEXT: .LBB39_2: # %entry ; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz ; CHECK-NOV-NEXT: bge a3, a5, .LBB39_12 @@ -4584,23 +4584,23 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB39_5: # %entry ; CHECK-NOV-NEXT: bge a5, a3, .LBB39_15 ; CHECK-NOV-NEXT: .LBB39_6: # %entry -; CHECK-NOV-NEXT: bge a5, a2, .LBB39_16 +; CHECK-NOV-NEXT: bge a5, a1, .LBB39_16 ; CHECK-NOV-NEXT: .LBB39_7: # %entry -; CHECK-NOV-NEXT: blt a5, a1, .LBB39_9 +; CHECK-NOV-NEXT: blt a5, a2, .LBB39_9 ; CHECK-NOV-NEXT: .LBB39_8: # %entry -; CHECK-NOV-NEXT: lui a1, 1048568 +; CHECK-NOV-NEXT: lui a2, 1048568 ; CHECK-NOV-NEXT: .LBB39_9: # %entry ; CHECK-NOV-NEXT: sh a4, 0(a0) ; CHECK-NOV-NEXT: sh a3, 2(a0) -; CHECK-NOV-NEXT: sh a2, 4(a0) -; CHECK-NOV-NEXT: sh a1, 6(a0) +; CHECK-NOV-NEXT: sh a1, 4(a0) +; CHECK-NOV-NEXT: sh a2, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB39_10: # %entry -; CHECK-NOV-NEXT: mv a1, a5 +; CHECK-NOV-NEXT: mv a2, a5 ; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a5, .LBB39_2 +; CHECK-NOV-NEXT: blt a1, a5, .LBB39_2 ; CHECK-NOV-NEXT: .LBB39_11: # %entry -; CHECK-NOV-NEXT: mv a2, a5 +; CHECK-NOV-NEXT: mv a1, a5 ; CHECK-NOV-NEXT: fcvt.w.s a4, fa0, rtz ; CHECK-NOV-NEXT: blt a3, a5, .LBB39_3 ; CHECK-NOV-NEXT: .LBB39_12: # %entry @@ -4615,10 +4615,10 @@ define <4 x i16> @stest_f32i16_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: blt a5, a3, .LBB39_6 ; CHECK-NOV-NEXT: .LBB39_15: # %entry ; CHECK-NOV-NEXT: lui a3, 1048568 -; CHECK-NOV-NEXT: blt a5, a2, .LBB39_7 +; CHECK-NOV-NEXT: blt a5, a1, .LBB39_7 ; CHECK-NOV-NEXT: .LBB39_16: # %entry -; CHECK-NOV-NEXT: lui a2, 1048568 -; CHECK-NOV-NEXT: bge a5, a1, .LBB39_8 +; CHECK-NOV-NEXT: lui a1, 1048568 +; CHECK-NOV-NEXT: bge a5, a2, .LBB39_8 ; CHECK-NOV-NEXT: j .LBB39_9 ; ; CHECK-V-LABEL: stest_f32i16_mm: @@ -4639,14 +4639,14 @@ entry: define <4 x i16> @utest_f32i16_mm(<4 x float> %x) { ; CHECK-NOV-LABEL: utest_f32i16_mm: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.wu.s a1, fa0, rtz +; CHECK-NOV-NEXT: fcvt.wu.s a1, fa1, rtz +; CHECK-NOV-NEXT: fcvt.wu.s a2, fa0, rtz ; CHECK-NOV-NEXT: lui a3, 16 ; CHECK-NOV-NEXT: addiw a3, a3, -1 -; CHECK-NOV-NEXT: fcvt.wu.s a2, fa1, rtz -; CHECK-NOV-NEXT: bgeu a1, a3, .LBB40_6 +; CHECK-NOV-NEXT: bgeu a2, a3, .LBB40_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry ; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz -; CHECK-NOV-NEXT: bgeu a2, a3, .LBB40_7 +; CHECK-NOV-NEXT: bgeu a1, a3, .LBB40_7 ; CHECK-NOV-NEXT: .LBB40_2: # %entry ; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz ; CHECK-NOV-NEXT: bgeu a4, a3, .LBB40_8 @@ -4655,17 +4655,17 @@ define <4 x i16> @utest_f32i16_mm(<4 x float> %x) { ; CHECK-NOV-NEXT: .LBB40_4: # %entry ; CHECK-NOV-NEXT: mv a5, a3 ; CHECK-NOV-NEXT: .LBB40_5: # %entry -; CHECK-NOV-NEXT: sh a1, 0(a0) -; CHECK-NOV-NEXT: sh a2, 2(a0) +; CHECK-NOV-NEXT: sh a2, 0(a0) +; CHECK-NOV-NEXT: sh a1, 2(a0) ; CHECK-NOV-NEXT: sh a4, 4(a0) ; CHECK-NOV-NEXT: sh a5, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB40_6: # %entry -; CHECK-NOV-NEXT: mv a1, a3 +; CHECK-NOV-NEXT: mv a2, a3 ; CHECK-NOV-NEXT: fcvt.wu.s a4, fa2, rtz -; CHECK-NOV-NEXT: bltu a2, a3, .LBB40_2 +; CHECK-NOV-NEXT: bltu a1, a3, .LBB40_2 ; CHECK-NOV-NEXT: .LBB40_7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv a1, a3 ; CHECK-NOV-NEXT: fcvt.wu.s a5, fa3, rtz ; CHECK-NOV-NEXT: bltu a4, a3, .LBB40_3 ; CHECK-NOV-NEXT: .LBB40_8: # %entry @@ -4690,50 +4690,50 @@ entry: define <4 x i16> @ustest_f32i16_mm(<4 x float> %x) { ; CHECK-NOV-LABEL: ustest_f32i16_mm: ; CHECK-NOV: # %bb.0: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz -; CHECK-NOV-NEXT: lui a3, 16 -; CHECK-NOV-NEXT: addiw a3, a3, -1 ; CHECK-NOV-NEXT: fcvt.w.s a2, fa2, rtz -; CHECK-NOV-NEXT: bge a1, a3, .LBB41_6 +; CHECK-NOV-NEXT: fcvt.w.s a1, fa3, rtz +; CHECK-NOV-NEXT: lui a4, 16 +; CHECK-NOV-NEXT: addiw a4, a4, -1 +; CHECK-NOV-NEXT: bge a1, a4, .LBB41_6 ; CHECK-NOV-NEXT: # %bb.1: # %entry -; CHECK-NOV-NEXT: fcvt.w.s a4, fa1, rtz -; CHECK-NOV-NEXT: bge a2, a3, .LBB41_7 +; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NOV-NEXT: bge a2, a4, .LBB41_7 ; CHECK-NOV-NEXT: .LBB41_2: # %entry ; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz -; CHECK-NOV-NEXT: bge a4, a3, .LBB41_8 +; CHECK-NOV-NEXT: bge a3, a4, .LBB41_8 ; CHECK-NOV-NEXT: .LBB41_3: # %entry -; CHECK-NOV-NEXT: blt a5, a3, .LBB41_5 +; CHECK-NOV-NEXT: blt a5, a4, .LBB41_5 ; CHECK-NOV-NEXT: .LBB41_4: # %entry -; CHECK-NOV-NEXT: mv a5, a3 +; CHECK-NOV-NEXT: mv a5, a4 ; CHECK-NOV-NEXT: .LBB41_5: # %entry -; CHECK-NOV-NEXT: sgtz a3, a5 -; CHECK-NOV-NEXT: negw a3, a3 -; CHECK-NOV-NEXT: and a3, a3, a5 -; CHECK-NOV-NEXT: sgtz a5, a4 +; CHECK-NOV-NEXT: sgtz a4, a5 +; CHECK-NOV-NEXT: negw a4, a4 +; CHECK-NOV-NEXT: and a4, a4, a5 +; CHECK-NOV-NEXT: sgtz a5, a3 ; CHECK-NOV-NEXT: negw a5, a5 -; CHECK-NOV-NEXT: and a4, a5, a4 +; CHECK-NOV-NEXT: and a3, a5, a3 ; CHECK-NOV-NEXT: sgtz a5, a2 ; CHECK-NOV-NEXT: negw a5, a5 ; CHECK-NOV-NEXT: and a2, a5, a2 ; CHECK-NOV-NEXT: sgtz a5, a1 ; CHECK-NOV-NEXT: negw a5, a5 ; CHECK-NOV-NEXT: and a1, a5, a1 -; CHECK-NOV-NEXT: sh a3, 0(a0) -; CHECK-NOV-NEXT: sh a4, 2(a0) +; CHECK-NOV-NEXT: sh a4, 0(a0) +; CHECK-NOV-NEXT: sh a3, 2(a0) ; CHECK-NOV-NEXT: sh a2, 4(a0) ; CHECK-NOV-NEXT: sh a1, 6(a0) ; CHECK-NOV-NEXT: ret ; CHECK-NOV-NEXT: .LBB41_6: # %entry -; CHECK-NOV-NEXT: mv a1, a3 -; CHECK-NOV-NEXT: fcvt.w.s a4, fa1, rtz -; CHECK-NOV-NEXT: blt a2, a3, .LBB41_2 +; CHECK-NOV-NEXT: mv a1, a4 +; CHECK-NOV-NEXT: fcvt.w.s a3, fa1, rtz +; CHECK-NOV-NEXT: blt a2, a4, .LBB41_2 ; CHECK-NOV-NEXT: .LBB41_7: # %entry -; CHECK-NOV-NEXT: mv a2, a3 +; CHECK-NOV-NEXT: mv a2, a4 ; CHECK-NOV-NEXT: fcvt.w.s a5, fa0, rtz -; CHECK-NOV-NEXT: blt a4, a3, .LBB41_3 +; CHECK-NOV-NEXT: blt a3, a4, .LBB41_3 ; CHECK-NOV-NEXT: .LBB41_8: # %entry -; CHECK-NOV-NEXT: mv a4, a3 -; CHECK-NOV-NEXT: bge a5, a3, .LBB41_4 +; CHECK-NOV-NEXT: mv a3, a4 +; CHECK-NOV-NEXT: bge a5, a4, .LBB41_4 ; CHECK-NOV-NEXT: j .LBB41_5 ; ; CHECK-V-LABEL: ustest_f32i16_mm: @@ -4790,16 +4790,16 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 ; CHECK-NOV-NEXT: .cfi_remember_state +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: lhu s1, 32(a1) ; CHECK-NOV-NEXT: lhu s2, 40(a1) -; CHECK-NOV-NEXT: lhu a2, 48(a1) +; CHECK-NOV-NEXT: lhu a0, 48(a1) ; CHECK-NOV-NEXT: lhu s3, 56(a1) ; CHECK-NOV-NEXT: lhu s4, 0(a1) ; CHECK-NOV-NEXT: lhu s5, 8(a1) ; CHECK-NOV-NEXT: lhu s6, 16(a1) ; CHECK-NOV-NEXT: lhu s7, 24(a1) -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 @@ -4820,8 +4820,8 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: lui a7, 8 @@ -5000,7 +5000,6 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -5008,6 +5007,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5025,7 +5025,6 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -5033,6 +5032,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5057,11 +5057,11 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5076,11 +5076,11 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5172,16 +5172,16 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 ; CHECK-NOV-NEXT: .cfi_remember_state +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: lhu s1, 32(a1) ; CHECK-NOV-NEXT: lhu s2, 40(a1) ; CHECK-NOV-NEXT: lhu s3, 48(a1) ; CHECK-NOV-NEXT: lhu s4, 56(a1) ; CHECK-NOV-NEXT: lhu s5, 0(a1) -; CHECK-NOV-NEXT: lhu a2, 8(a1) +; CHECK-NOV-NEXT: lhu a0, 8(a1) ; CHECK-NOV-NEXT: lhu s6, 16(a1) ; CHECK-NOV-NEXT: lhu s7, 24(a1) -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s6 @@ -5202,8 +5202,8 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: fcvt.lu.s s1, fs6, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s5 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-NOV-NEXT: lui a3, 16 @@ -5340,7 +5340,6 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -5348,6 +5347,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5365,7 +5365,6 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -5373,6 +5372,7 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5397,11 +5397,11 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5416,11 +5416,11 @@ define <8 x i16> @utesth_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.lu.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5511,16 +5511,16 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: .cfi_offset fs5, -120 ; CHECK-NOV-NEXT: .cfi_offset fs6, -128 ; CHECK-NOV-NEXT: .cfi_remember_state +; CHECK-NOV-NEXT: mv s0, a0 ; CHECK-NOV-NEXT: lhu s1, 32(a1) ; CHECK-NOV-NEXT: lhu s2, 40(a1) -; CHECK-NOV-NEXT: lhu a2, 48(a1) +; CHECK-NOV-NEXT: lhu a0, 48(a1) ; CHECK-NOV-NEXT: lhu s3, 56(a1) ; CHECK-NOV-NEXT: lhu s4, 0(a1) ; CHECK-NOV-NEXT: lhu s5, 8(a1) ; CHECK-NOV-NEXT: lhu s6, 16(a1) ; CHECK-NOV-NEXT: lhu s7, 24(a1) -; CHECK-NOV-NEXT: mv s0, a0 -; CHECK-NOV-NEXT: fmv.w.x fa0, a2 +; CHECK-NOV-NEXT: fmv.w.x fa0, a0 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs6, fa0 ; CHECK-NOV-NEXT: fmv.w.x fa0, s2 @@ -5541,8 +5541,8 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-NOV-NEXT: fmv.w.x fa0, s4 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fmv.s fs0, fa0 -; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: fcvt.l.s s1, fs6, rtz +; CHECK-NOV-NEXT: fmv.w.x fa0, s3 ; CHECK-NOV-NEXT: call __extendhfsf2 ; CHECK-NOV-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-NOV-NEXT: lui a3, 16 @@ -5703,7 +5703,6 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, a0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -5711,6 +5710,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s6 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5728,7 +5728,6 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s5 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: csrr a0, vlenb @@ -5736,6 +5735,7 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: add a0, sp, a0 ; CHECK-V-NEXT: addi a0, a0, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s4 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5760,11 +5760,11 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s3 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s2 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma @@ -5779,11 +5779,11 @@ define <8 x i16> @ustest_f16i16_mm(<8 x half> %x) { ; CHECK-V-NEXT: fmv.w.x fa0, s1 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz -; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-V-NEXT: vmv.s.x v8, a0 ; CHECK-V-NEXT: addi a0, sp, 16 ; CHECK-V-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill +; CHECK-V-NEXT: fmv.w.x fa0, s0 ; CHECK-V-NEXT: call __extendhfsf2 ; CHECK-V-NEXT: fcvt.l.s a0, fa0, rtz ; CHECK-V-NEXT: vsetivli zero, 2, e32, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll index 195ffc50594c3..9e8cd85739183 100644 --- a/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/frm-insert.ll @@ -466,8 +466,8 @@ define @test5( %0, ; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: srl a0, a0, a2 ; CHECK-NEXT: andi a0, a0, 7 -; CHECK-NEXT: vfadd.vv v8, v8, v8 ; CHECK-NEXT: sw a0, 0(a1) +; CHECK-NEXT: vfadd.vv v8, v8, v8 ; CHECK-NEXT: ret ; ; UNOPT-LABEL: test5: @@ -482,8 +482,8 @@ define @test5( %0, ; UNOPT-NEXT: slli a2, a2, 2 ; UNOPT-NEXT: srl a0, a0, a2 ; UNOPT-NEXT: andi a0, a0, 7 -; UNOPT-NEXT: vfadd.vv v8, v8, v8 ; UNOPT-NEXT: sw a0, 0(a1) +; UNOPT-NEXT: vfadd.vv v8, v8, v8 ; UNOPT-NEXT: ret entry: %a = call @llvm.riscv.vfadd.nxv1f32.nxv1f32( diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll index 3d992aa13e379..15ba3850de23d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-constrained-sdnode.ll @@ -12,11 +12,11 @@ define @round_nxv1f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -35,11 +35,11 @@ define @round_nxv2f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -58,11 +58,11 @@ define @round_nxv4f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -81,11 +81,11 @@ define @round_nxv8f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -104,11 +104,11 @@ define @round_nxv16f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -127,11 +127,11 @@ define @round_nxv32f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -152,9 +152,9 @@ define @round_nxv1f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -175,9 +175,9 @@ define @round_nxv2f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -198,9 +198,9 @@ define @round_nxv4f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -221,9 +221,9 @@ define @round_nxv8f32( %x) strictfp { ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -244,9 +244,9 @@ define @round_nxv16f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -265,11 +265,11 @@ define @round_nxv1f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -288,11 +288,11 @@ define @round_nxv2f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -311,11 +311,11 @@ define @round_nxv4f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -334,11 +334,11 @@ define @round_nxv8f64( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll index f7422b279149f..323a22a89bf7b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fround-sdnode.ll @@ -20,11 +20,11 @@ define @round_nxv1bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -43,11 +43,11 @@ define @round_nxv2bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -66,11 +66,11 @@ define @round_nxv4bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -89,11 +89,11 @@ define @round_nxv8bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -112,11 +112,11 @@ define @round_nxv16bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -135,11 +135,11 @@ define @round_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -169,12 +169,12 @@ define @round_nxv32bf16( %x) { define @round_nxv1f16( %x) { ; ZVFH-LABEL: round_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -187,11 +187,11 @@ define @round_nxv1f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -208,12 +208,12 @@ declare @llvm.round.nxv1f16() define @round_nxv2f16( %x) { ; ZVFH-LABEL: round_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -226,11 +226,11 @@ define @round_nxv2f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -247,12 +247,12 @@ declare @llvm.round.nxv2f16() define @round_nxv4f16( %x) { ; ZVFH-LABEL: round_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -265,11 +265,11 @@ define @round_nxv4f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -286,12 +286,12 @@ declare @llvm.round.nxv4f16() define @round_nxv8f16( %x) { ; ZVFH-LABEL: round_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -304,11 +304,11 @@ define @round_nxv8f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -325,12 +325,12 @@ declare @llvm.round.nxv8f16() define @round_nxv16f16( %x) { ; ZVFH-LABEL: round_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -343,11 +343,11 @@ define @round_nxv16f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -364,12 +364,12 @@ declare @llvm.round.nxv16f16() define @round_nxv32f16( %x) { ; ZVFH-LABEL: round_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -382,11 +382,11 @@ define @round_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -421,8 +421,8 @@ define @round_nxv1f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -441,8 +441,8 @@ define @round_nxv2f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -461,8 +461,8 @@ define @round_nxv4f32( %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -481,8 +481,8 @@ define @round_nxv8f32( %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -501,8 +501,8 @@ define @round_nxv16f32( %x) { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -517,12 +517,12 @@ declare @llvm.round.nxv16f32() define @round_nxv1f64( %x) { ; CHECK-LABEL: round_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -537,12 +537,12 @@ declare @llvm.round.nxv1f64() define @round_nxv2f64( %x) { ; CHECK-LABEL: round_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -557,12 +557,12 @@ declare @llvm.round.nxv2f64() define @round_nxv4f64( %x) { ; CHECK-LABEL: round_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -577,12 +577,12 @@ declare @llvm.round.nxv4f64() define @round_nxv8f64( %x) { ; CHECK-LABEL: round_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll index c293ac91b63bf..6cd6eef99a9ec 100644 --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-constrained-sdnode.ll @@ -12,11 +12,11 @@ define @roundeven_nxv1f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI0_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI0_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -35,11 +35,11 @@ define @roundeven_nxv2f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI1_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI1_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -58,11 +58,11 @@ define @roundeven_nxv4f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI2_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI2_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -81,11 +81,11 @@ define @roundeven_nxv8f16( %x) strictfp { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI3_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI3_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -104,11 +104,11 @@ define @roundeven_nxv16f16( %x) strictf ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI4_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -127,11 +127,11 @@ define @roundeven_nxv32f16( %x) strictf ; CHECK-NEXT: vsetvli a0, zero, e16, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: flh fa5, %lo(.LCPI5_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -152,9 +152,9 @@ define @roundeven_nxv1f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -175,9 +175,9 @@ define @roundeven_nxv2f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -198,9 +198,9 @@ define @roundeven_nxv4f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -221,9 +221,9 @@ define @roundeven_nxv8f32( %x) strictfp ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -244,9 +244,9 @@ define @roundeven_nxv16f32( %x) stric ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -265,11 +265,11 @@ define @roundeven_nxv1f64( %x) strict ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI11_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI11_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: vmflt.vf v0, v9, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -288,11 +288,11 @@ define @roundeven_nxv2f64( %x) strict ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI12_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI12_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: vmflt.vf v0, v10, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -311,11 +311,11 @@ define @roundeven_nxv4f64( %x) strict ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI13_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI13_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: vmflt.vf v0, v12, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -334,11 +334,11 @@ define @roundeven_nxv8f64( %x) strict ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, mu ; CHECK-NEXT: vmfne.vv v0, v8, v8 ; CHECK-NEXT: lui a0, %hi(.LCPI14_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) ; CHECK-NEXT: vfadd.vv v8, v8, v8, v0.t +; CHECK-NEXT: fld fa5, %lo(.LCPI14_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: vmflt.vf v0, v16, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll index 865531b77eb29..903345dca1af2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/froundeven-sdnode.ll @@ -19,11 +19,11 @@ define @roundeven_nxv1bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -42,11 +42,11 @@ define @roundeven_nxv2bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -65,11 +65,11 @@ define @roundeven_nxv4bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -88,11 +88,11 @@ define @roundeven_nxv8bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -111,11 +111,11 @@ define @roundeven_nxv16bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v8, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -134,11 +134,11 @@ define @roundeven_nxv32bf16( %x) { ; CHECK-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -168,12 +168,12 @@ define @roundeven_nxv32bf16( %x) { define @roundeven_nxv1f16( %x) { ; ZVFH-LABEL: roundeven_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI6_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI6_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -186,11 +186,11 @@ define @roundeven_nxv1f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -207,12 +207,12 @@ declare @llvm.roundeven.nxv1f16() define @roundeven_nxv2f16( %x) { ; ZVFH-LABEL: roundeven_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI7_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI7_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -225,11 +225,11 @@ define @roundeven_nxv2f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -246,12 +246,12 @@ declare @llvm.roundeven.nxv2f16() define @roundeven_nxv4f16( %x) { ; ZVFH-LABEL: roundeven_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI8_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI8_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -264,11 +264,11 @@ define @roundeven_nxv4f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -285,12 +285,12 @@ declare @llvm.roundeven.nxv4f16() define @roundeven_nxv8f16( %x) { ; ZVFH-LABEL: roundeven_nxv8f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI9_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI9_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -303,11 +303,11 @@ define @roundeven_nxv8f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -324,12 +324,12 @@ declare @llvm.roundeven.nxv8f16() define @roundeven_nxv16f16( %x) { ; ZVFH-LABEL: roundeven_nxv16f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI10_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI10_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -342,11 +342,11 @@ define @roundeven_nxv16f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -363,12 +363,12 @@ declare @llvm.roundeven.nxv16f16() define @roundeven_nxv32f16( %x) { ; ZVFH-LABEL: roundeven_nxv32f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: vsetvli a0, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI11_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI11_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -381,11 +381,11 @@ define @roundeven_nxv32f16( %x) { ; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -420,8 +420,8 @@ define @roundeven_nxv1f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -440,8 +440,8 @@ define @roundeven_nxv2f32( %x) { ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -460,8 +460,8 @@ define @roundeven_nxv4f32( %x) { ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -480,8 +480,8 @@ define @roundeven_nxv8f32( %x) { ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -500,8 +500,8 @@ define @roundeven_nxv16f32( %x) { ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -516,12 +516,12 @@ declare @llvm.roundeven.nxv16f32() define @roundeven_nxv1f64( %x) { ; CHECK-LABEL: roundeven_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI17_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI17_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI17_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -536,12 +536,12 @@ declare @llvm.roundeven.nxv1f64() define @roundeven_nxv2f64( %x) { ; CHECK-LABEL: roundeven_nxv2f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI18_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI18_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI18_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -556,12 +556,12 @@ declare @llvm.roundeven.nxv2f64() define @roundeven_nxv4f64( %x) { ; CHECK-LABEL: roundeven_nxv4f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI19_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI19_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI19_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -576,12 +576,12 @@ declare @llvm.roundeven.nxv4f64() define @roundeven_nxv8f64( %x) { ; CHECK-LABEL: roundeven_nxv8f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI20_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI20_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI20_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll index b569efc7447da..f52200b4e7c34 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll @@ -984,10 +984,10 @@ define @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshr_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @fshl_v16i64( %a, @ceil_nxv1f16_to_ui32( %x) { define @ceil_nxv1f16_to_si64( %x) { ; CHECK-LABEL: ceil_nxv1f16_to_si64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI22_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI22_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI22_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -388,12 +388,12 @@ define @ceil_nxv1f16_to_si64( %x) { define @ceil_nxv1f16_to_ui64( %x) { ; CHECK-LABEL: ceil_nxv1f16_to_ui64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI23_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI23_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -533,12 +533,12 @@ define @ceil_nxv4f16_to_ui32( %x) { define @ceil_nxv4f16_to_si64( %x) { ; CHECK-LABEL: ceil_nxv4f16_to_si64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI30_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI30_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI30_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI30_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -590,12 +590,12 @@ define @ceil_nxv4f16_to_si64( %x) { define @ceil_nxv4f16_to_ui64( %x) { ; CHECK-LABEL: ceil_nxv4f16_to_ui64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI31_0) -; CHECK-NEXT: flh fa5, %lo(.LCPI31_0)(a0) ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI31_0) +; CHECK-NEXT: flh fa5, %lo(.LCPI31_0)(a0) ; CHECK-NEXT: fsrmi a0, 3 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/localvar.ll b/llvm/test/CodeGen/RISCV/rvv/localvar.ll index ad8fde013ce08..fb7cd0072efa9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/localvar.ll +++ b/llvm/test/CodeGen/RISCV/rvv/localvar.ll @@ -223,14 +223,14 @@ define void @local_var_m2_with_varsize_object(i64 %n) { ; RV64IV-NEXT: andi a0, a0, -16 ; RV64IV-NEXT: sub a0, sp, a0 ; RV64IV-NEXT: mv sp, a0 -; RV64IV-NEXT: csrr a1, vlenb -; RV64IV-NEXT: slli a1, a1, 1 -; RV64IV-NEXT: sub a1, s0, a1 -; RV64IV-NEXT: addi a1, a1, -32 ; RV64IV-NEXT: csrr s1, vlenb ; RV64IV-NEXT: slli s1, s1, 1 ; RV64IV-NEXT: sub s1, s0, s1 ; RV64IV-NEXT: addi s1, s1, -32 +; RV64IV-NEXT: csrr a1, vlenb +; RV64IV-NEXT: slli a1, a1, 1 +; RV64IV-NEXT: sub a1, s0, a1 +; RV64IV-NEXT: addi a1, a1, -32 ; RV64IV-NEXT: call notdead ; RV64IV-NEXT: vl2r.v v8, (s1) ; RV64IV-NEXT: csrr a0, vlenb @@ -282,15 +282,15 @@ define void @local_var_m2_with_bp(i64 %n) { ; RV64IV-NEXT: andi a0, a0, -16 ; RV64IV-NEXT: sub a0, sp, a0 ; RV64IV-NEXT: mv sp, a0 +; RV64IV-NEXT: csrr s2, vlenb +; RV64IV-NEXT: slli s2, s2, 1 +; RV64IV-NEXT: add s2, s1, s2 +; RV64IV-NEXT: addi s2, s2, 224 ; RV64IV-NEXT: addi a1, s1, 128 ; RV64IV-NEXT: csrr a2, vlenb ; RV64IV-NEXT: slli a2, a2, 1 ; RV64IV-NEXT: add a2, s1, a2 ; RV64IV-NEXT: addi a2, a2, 224 -; RV64IV-NEXT: csrr s2, vlenb -; RV64IV-NEXT: slli s2, s2, 1 -; RV64IV-NEXT: add s2, s1, s2 -; RV64IV-NEXT: addi s2, s2, 224 ; RV64IV-NEXT: call notdead2 ; RV64IV-NEXT: lw zero, 124(s1) ; RV64IV-NEXT: vl2r.v v8, (s2) diff --git a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll index 2553f563b7d0f..85b04f177f66f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memcpy-inline.ll @@ -137,12 +137,12 @@ define void @unaligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind { ; RV32-NEXT: sb a2, 6(a0) ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV32-NEXT: vle8.v v8, (a1) +; RV32-NEXT: addi a2, a0, 4 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: addi a1, a1, 4 ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV32-NEXT: vle8.v v8, (a1) -; RV32-NEXT: addi a0, a0, 4 -; RV32-NEXT: vse8.v v8, (a0) +; RV32-NEXT: vse8.v v8, (a2) ; RV32-NEXT: ret ; ; RV64-LABEL: unaligned_memcpy7: @@ -151,12 +151,12 @@ define void @unaligned_memcpy7(ptr nocapture %dest, ptr %src) nounwind { ; RV64-NEXT: sb a2, 6(a0) ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64-NEXT: vle8.v v8, (a1) +; RV64-NEXT: addi a2, a0, 4 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: addi a1, a1, 4 ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV64-NEXT: vle8.v v8, (a1) -; RV64-NEXT: addi a0, a0, 4 -; RV64-NEXT: vse8.v v8, (a0) +; RV64-NEXT: vse8.v v8, (a2) ; RV64-NEXT: ret ; ; RV32-FAST-LABEL: unaligned_memcpy7: @@ -223,11 +223,11 @@ define void @unaligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind { ; RV32-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV32-NEXT: vle8.v v8, (a2) ; RV32-NEXT: addi a2, a0, 12 +; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: vse8.v v8, (a2) ; RV32-NEXT: addi a1, a1, 8 ; RV32-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV32-NEXT: vle8.v v8, (a1) -; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; @@ -242,11 +242,11 @@ define void @unaligned_memcpy15(ptr nocapture %dest, ptr %src) nounwind { ; RV64-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; RV64-NEXT: vle8.v v8, (a2) ; RV64-NEXT: addi a2, a0, 12 +; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: vse8.v v8, (a2) ; RV64-NEXT: addi a1, a1, 8 ; RV64-NEXT: vsetivli zero, 4, e8, mf4, ta, ma ; RV64-NEXT: vle8.v v8, (a1) -; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; @@ -312,9 +312,9 @@ define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a1) ; RV32-NEXT: vse8.v v8, (a0) +; RV32-NEXT: addi a0, a0, 15 ; RV32-NEXT: addi a1, a1, 15 ; RV32-NEXT: vle8.v v8, (a1) -; RV32-NEXT: addi a0, a0, 15 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; @@ -323,9 +323,9 @@ define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vle8.v v8, (a1) ; RV64-NEXT: vse8.v v8, (a0) +; RV64-NEXT: addi a0, a0, 15 ; RV64-NEXT: addi a1, a1, 15 ; RV64-NEXT: vle8.v v8, (a1) -; RV64-NEXT: addi a0, a0, 15 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; @@ -334,9 +334,9 @@ define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { ; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-FAST-NEXT: vle64.v v8, (a1) ; RV32-FAST-NEXT: vse64.v v8, (a0) +; RV32-FAST-NEXT: addi a0, a0, 15 ; RV32-FAST-NEXT: addi a1, a1, 15 ; RV32-FAST-NEXT: vle64.v v8, (a1) -; RV32-FAST-NEXT: addi a0, a0, 15 ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; @@ -345,9 +345,9 @@ define void @unaligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { ; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-FAST-NEXT: vle64.v v8, (a1) ; RV64-FAST-NEXT: vse64.v v8, (a0) +; RV64-FAST-NEXT: addi a0, a0, 15 ; RV64-FAST-NEXT: addi a1, a1, 15 ; RV64-FAST-NEXT: vle64.v v8, (a1) -; RV64-FAST-NEXT: addi a0, a0, 15 ; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret entry: @@ -459,10 +459,10 @@ define void @unaligned_memcpy96(ptr nocapture %dest, ptr %src) nounwind { ; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-FAST-NEXT: vle64.v v8, (a1) ; RV32-FAST-NEXT: vse64.v v8, (a0) +; RV32-FAST-NEXT: addi a0, a0, 64 ; RV32-FAST-NEXT: addi a1, a1, 64 ; RV32-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-FAST-NEXT: vle64.v v8, (a1) -; RV32-FAST-NEXT: addi a0, a0, 64 ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; @@ -471,10 +471,10 @@ define void @unaligned_memcpy96(ptr nocapture %dest, ptr %src) nounwind { ; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-FAST-NEXT: vle64.v v8, (a1) ; RV64-FAST-NEXT: vse64.v v8, (a0) +; RV64-FAST-NEXT: addi a0, a0, 64 ; RV64-FAST-NEXT: addi a1, a1, 64 ; RV64-FAST-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-FAST-NEXT: vle64.v v8, (a1) -; RV64-FAST-NEXT: addi a0, a0, 64 ; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret entry: @@ -568,12 +568,12 @@ define void @unaligned_memcpy196(ptr nocapture %dest, ptr %src) nounwind { ; RV32-FAST-NEXT: sw a2, 192(a0) ; RV32-FAST-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-FAST-NEXT: vle64.v v8, (a1) +; RV32-FAST-NEXT: addi a2, a0, 128 ; RV32-FAST-NEXT: vse64.v v8, (a0) -; RV32-FAST-NEXT: addi a1, a1, 128 +; RV32-FAST-NEXT: addi a0, a1, 128 ; RV32-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-FAST-NEXT: vle64.v v8, (a1) -; RV32-FAST-NEXT: addi a0, a0, 128 -; RV32-FAST-NEXT: vse64.v v8, (a0) +; RV32-FAST-NEXT: vle64.v v8, (a0) +; RV32-FAST-NEXT: vse64.v v8, (a2) ; RV32-FAST-NEXT: ret ; ; RV64-FAST-LABEL: unaligned_memcpy196: @@ -582,12 +582,12 @@ define void @unaligned_memcpy196(ptr nocapture %dest, ptr %src) nounwind { ; RV64-FAST-NEXT: sw a2, 192(a0) ; RV64-FAST-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-FAST-NEXT: vle64.v v8, (a1) +; RV64-FAST-NEXT: addi a2, a0, 128 ; RV64-FAST-NEXT: vse64.v v8, (a0) -; RV64-FAST-NEXT: addi a1, a1, 128 +; RV64-FAST-NEXT: addi a0, a1, 128 ; RV64-FAST-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-FAST-NEXT: vle64.v v8, (a1) -; RV64-FAST-NEXT: addi a0, a0, 128 -; RV64-FAST-NEXT: vse64.v v8, (a0) +; RV64-FAST-NEXT: vle64.v v8, (a0) +; RV64-FAST-NEXT: vse64.v v8, (a2) ; RV64-FAST-NEXT: ret entry: tail call void @llvm.memcpy.inline.p0.p0.i64(ptr %dest, ptr %src, i64 196, i1 false) @@ -624,9 +624,9 @@ define void @unaligned_memcpy256(ptr nocapture %dest, ptr %src) nounwind { ; RV32-FAST-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-FAST-NEXT: vle64.v v8, (a1) ; RV32-FAST-NEXT: vse64.v v8, (a0) +; RV32-FAST-NEXT: addi a0, a0, 128 ; RV32-FAST-NEXT: addi a1, a1, 128 ; RV32-FAST-NEXT: vle64.v v8, (a1) -; RV32-FAST-NEXT: addi a0, a0, 128 ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; @@ -635,9 +635,9 @@ define void @unaligned_memcpy256(ptr nocapture %dest, ptr %src) nounwind { ; RV64-FAST-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-FAST-NEXT: vle64.v v8, (a1) ; RV64-FAST-NEXT: vse64.v v8, (a0) +; RV64-FAST-NEXT: addi a0, a0, 128 ; RV64-FAST-NEXT: addi a1, a1, 128 ; RV64-FAST-NEXT: vle64.v v8, (a1) -; RV64-FAST-NEXT: addi a0, a0, 128 ; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret entry: @@ -837,10 +837,10 @@ define void @aligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vle64.v v8, (a1) ; RV32-NEXT: vse64.v v8, (a0) +; RV32-NEXT: addi a0, a0, 15 ; RV32-NEXT: addi a1, a1, 15 ; RV32-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV32-NEXT: vle8.v v8, (a1) -; RV32-NEXT: addi a0, a0, 15 ; RV32-NEXT: vse8.v v8, (a0) ; RV32-NEXT: ret ; @@ -849,10 +849,10 @@ define void @aligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { ; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vle64.v v8, (a1) ; RV64-NEXT: vse64.v v8, (a0) +; RV64-NEXT: addi a0, a0, 15 ; RV64-NEXT: addi a1, a1, 15 ; RV64-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; RV64-NEXT: vle8.v v8, (a1) -; RV64-NEXT: addi a0, a0, 15 ; RV64-NEXT: vse8.v v8, (a0) ; RV64-NEXT: ret ; @@ -861,9 +861,9 @@ define void @aligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { ; RV32-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-FAST-NEXT: vle64.v v8, (a1) ; RV32-FAST-NEXT: vse64.v v8, (a0) +; RV32-FAST-NEXT: addi a0, a0, 15 ; RV32-FAST-NEXT: addi a1, a1, 15 ; RV32-FAST-NEXT: vle64.v v8, (a1) -; RV32-FAST-NEXT: addi a0, a0, 15 ; RV32-FAST-NEXT: vse64.v v8, (a0) ; RV32-FAST-NEXT: ret ; @@ -872,9 +872,9 @@ define void @aligned_memcpy31(ptr nocapture %dest, ptr %src) nounwind { ; RV64-FAST-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-FAST-NEXT: vle64.v v8, (a1) ; RV64-FAST-NEXT: vse64.v v8, (a0) +; RV64-FAST-NEXT: addi a0, a0, 15 ; RV64-FAST-NEXT: addi a1, a1, 15 ; RV64-FAST-NEXT: vle64.v v8, (a1) -; RV64-FAST-NEXT: addi a0, a0, 15 ; RV64-FAST-NEXT: vse64.v v8, (a0) ; RV64-FAST-NEXT: ret entry: @@ -926,10 +926,10 @@ define void @aligned_memcpy96(ptr nocapture %dest, ptr %src) nounwind { ; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV32-BOTH-NEXT: vle64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) +; RV32-BOTH-NEXT: addi a0, a0, 64 ; RV32-BOTH-NEXT: addi a1, a1, 64 ; RV32-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-BOTH-NEXT: vle64.v v8, (a1) -; RV32-BOTH-NEXT: addi a0, a0, 64 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; @@ -938,10 +938,10 @@ define void @aligned_memcpy96(ptr nocapture %dest, ptr %src) nounwind { ; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; RV64-BOTH-NEXT: vle64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) +; RV64-BOTH-NEXT: addi a0, a0, 64 ; RV64-BOTH-NEXT: addi a1, a1, 64 ; RV64-BOTH-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-BOTH-NEXT: vle64.v v8, (a1) -; RV64-BOTH-NEXT: addi a0, a0, 64 ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret entry: @@ -975,12 +975,12 @@ define void @aligned_memcpy196(ptr nocapture %dest, ptr %src) nounwind { ; RV32-BOTH-NEXT: sw a2, 192(a0) ; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-BOTH-NEXT: vle64.v v8, (a1) +; RV32-BOTH-NEXT: addi a2, a0, 128 ; RV32-BOTH-NEXT: vse64.v v8, (a0) -; RV32-BOTH-NEXT: addi a1, a1, 128 +; RV32-BOTH-NEXT: addi a0, a1, 128 ; RV32-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV32-BOTH-NEXT: vle64.v v8, (a1) -; RV32-BOTH-NEXT: addi a0, a0, 128 -; RV32-BOTH-NEXT: vse64.v v8, (a0) +; RV32-BOTH-NEXT: vle64.v v8, (a0) +; RV32-BOTH-NEXT: vse64.v v8, (a2) ; RV32-BOTH-NEXT: ret ; ; RV64-BOTH-LABEL: aligned_memcpy196: @@ -989,12 +989,12 @@ define void @aligned_memcpy196(ptr nocapture %dest, ptr %src) nounwind { ; RV64-BOTH-NEXT: sw a2, 192(a0) ; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BOTH-NEXT: vle64.v v8, (a1) +; RV64-BOTH-NEXT: addi a2, a0, 128 ; RV64-BOTH-NEXT: vse64.v v8, (a0) -; RV64-BOTH-NEXT: addi a1, a1, 128 +; RV64-BOTH-NEXT: addi a0, a1, 128 ; RV64-BOTH-NEXT: vsetivli zero, 8, e64, m4, ta, ma -; RV64-BOTH-NEXT: vle64.v v8, (a1) -; RV64-BOTH-NEXT: addi a0, a0, 128 -; RV64-BOTH-NEXT: vse64.v v8, (a0) +; RV64-BOTH-NEXT: vle64.v v8, (a0) +; RV64-BOTH-NEXT: vse64.v v8, (a2) ; RV64-BOTH-NEXT: ret entry: tail call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 %dest, ptr align 8 %src, i64 196, i1 false) @@ -1007,9 +1007,9 @@ define void @aligned_memcpy256(ptr nocapture %dest, ptr %src) nounwind { ; RV32-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV32-BOTH-NEXT: vle64.v v8, (a1) ; RV32-BOTH-NEXT: vse64.v v8, (a0) +; RV32-BOTH-NEXT: addi a0, a0, 128 ; RV32-BOTH-NEXT: addi a1, a1, 128 ; RV32-BOTH-NEXT: vle64.v v8, (a1) -; RV32-BOTH-NEXT: addi a0, a0, 128 ; RV32-BOTH-NEXT: vse64.v v8, (a0) ; RV32-BOTH-NEXT: ret ; @@ -1018,9 +1018,9 @@ define void @aligned_memcpy256(ptr nocapture %dest, ptr %src) nounwind { ; RV64-BOTH-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; RV64-BOTH-NEXT: vle64.v v8, (a1) ; RV64-BOTH-NEXT: vse64.v v8, (a0) +; RV64-BOTH-NEXT: addi a0, a0, 128 ; RV64-BOTH-NEXT: addi a1, a1, 128 ; RV64-BOTH-NEXT: vle64.v v8, (a1) -; RV64-BOTH-NEXT: addi a0, a0, 128 ; RV64-BOTH-NEXT: vse64.v v8, (a0) ; RV64-BOTH-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll index 8190a82d7035b..f4502ee0fa8f4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/memory-args.ll +++ b/llvm/test/CodeGen/RISCV/rvv/memory-args.ll @@ -55,9 +55,9 @@ define @caller() { ; RV64IV-NEXT: add a0, sp, a0 ; RV64IV-NEXT: addi a0, a0, 64 ; RV64IV-NEXT: vl8r.v v24, (a0) -; RV64IV-NEXT: addi a1, sp, 64 ; RV64IV-NEXT: addi a0, sp, 64 -; RV64IV-NEXT: vs8r.v v24, (a1) +; RV64IV-NEXT: vs8r.v v24, (a0) +; RV64IV-NEXT: addi a0, sp, 64 ; RV64IV-NEXT: call callee ; RV64IV-NEXT: addi sp, s0, -80 ; RV64IV-NEXT: .cfi_def_cfa sp, 80 diff --git a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll index 0fad09f27007c..893658ebb1901 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mgather-sdnode.ll @@ -1221,12 +1221,12 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ; RV32: # %bb.0: ; RV32-NEXT: vl8re64.v v24, (a0) ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu +; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t ; RV32-NEXT: srli a2, a0, 3 ; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v7, v0, a2 +; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: vsetvli a2, zero, e64, m8, ta, mu -; RV32-NEXT: vluxei32.v v16, (zero), v8, v0.t -; RV32-NEXT: vmv1r.v v0, v7 ; RV32-NEXT: vluxei32.v v24, (zero), v12, v0.t ; RV32-NEXT: slli a0, a0, 3 ; RV32-NEXT: add a0, a1, a0 @@ -1236,37 +1236,20 @@ define void @mgather_nxv16i64( %ptrs0, %ptr ; ; RV64-LABEL: mgather_nxv16i64: ; RV64: # %bb.0: -; RV64-NEXT: addi sp, sp, -16 -; RV64-NEXT: .cfi_def_cfa_offset 16 -; RV64-NEXT: csrr a3, vlenb -; RV64-NEXT: slli a3, a3, 3 -; RV64-NEXT: sub sp, sp, a3 -; RV64-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vmv8r.v v16, v8 ; RV64-NEXT: vl8re64.v v24, (a0) -; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vsetvli a0, zero, e64, m8, ta, mu +; RV64-NEXT: vluxei64.v v24, (zero), v8, v0.t ; RV64-NEXT: vl8re64.v v8, (a1) +; RV64-NEXT: csrr a0, vlenb ; RV64-NEXT: srli a1, a0, 3 -; RV64-NEXT: vslidedown.vx v7, v0, a1 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, mu -; RV64-NEXT: vluxei64.v v24, (zero), v16, v0.t -; RV64-NEXT: vmv1r.v v0, v7 -; RV64-NEXT: addi a1, sp, 16 -; RV64-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vluxei64.v v8, (zero), v16, v0.t ; RV64-NEXT: slli a0, a0, 3 ; RV64-NEXT: add a0, a2, a0 ; RV64-NEXT: vs8r.v v8, (a0) ; RV64-NEXT: vs8r.v v24, (a2) -; RV64-NEXT: csrr a0, vlenb -; RV64-NEXT: slli a0, a0, 3 -; RV64-NEXT: add sp, sp, a0 -; RV64-NEXT: .cfi_def_cfa sp, 16 -; RV64-NEXT: addi sp, sp, 16 -; RV64-NEXT: .cfi_def_cfa_offset 0 ; RV64-NEXT: ret %p0 = call @llvm.vector.insert.nxv8p0.nxv16p0( undef, %ptrs0, i64 0) %p1 = call @llvm.vector.insert.nxv8p0.nxv16p0( %p0, %ptrs1, i64 8) @@ -2347,12 +2330,12 @@ define @mgather_baseidx_nxv32i8(ptr %base, ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v12, (a0), v24, v0.t +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma +; RV64-NEXT: vsext.vf8 v24, v9 ; RV64-NEXT: srli a2, a1, 3 ; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma ; RV64-NEXT: vslidedown.vx v0, v0, a2 -; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf8 v24, v9 -; RV64-NEXT: vsetvli zero, zero, e8, m1, ta, mu +; RV64-NEXT: vsetvli a3, zero, e8, m1, ta, mu ; RV64-NEXT: vluxei64.v v13, (a0), v24, v0.t ; RV64-NEXT: srli a1, a1, 2 ; RV64-NEXT: vsetvli a3, zero, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll index 3cf7cc9cb5152..cd6f76a79373f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/mscatter-sdnode.ll @@ -1904,57 +1904,25 @@ define void @mscatter_nxv16f64( %val0, @reverse_nxv3i64( %a) { ; CHECK-LABEL: reverse_nxv3i64: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: addi a0, a0, -1 ; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma ; CHECK-NEXT: vid.v v12 -; CHECK-NEXT: vrsub.vx v14, v12, a0 -; CHECK-NEXT: vrgather.vv v13, v10, v14 -; CHECK-NEXT: vrgather.vv v10, v9, v14 -; CHECK-NEXT: vmv.v.v v12, v13 -; CHECK-NEXT: vrgather.vv v15, v8, v14 -; CHECK-NEXT: vmv.v.v v13, v10 -; CHECK-NEXT: vrgather.vv v8, v11, v14 -; CHECK-NEXT: vmv.v.v v14, v15 -; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: addi a0, a0, -1 +; CHECK-NEXT: vrsub.vx v12, v12, a0 +; CHECK-NEXT: vrgather.vv v15, v8, v12 +; CHECK-NEXT: vrgather.vv v14, v9, v12 +; CHECK-NEXT: vrgather.vv v9, v10, v12 +; CHECK-NEXT: vrgather.vv v8, v11, v12 +; CHECK-NEXT: vmv.v.v v8, v9 +; CHECK-NEXT: vmv.v.v v9, v14 +; CHECK-NEXT: vmv.v.v v10, v15 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv3i64( %a) ret %res @@ -1969,19 +1968,18 @@ define @reverse_nxv6i64( %a) { ; CHECK-NEXT: vid.v v16 ; CHECK-NEXT: srli a0, a0, 3 ; CHECK-NEXT: addi a0, a0, -1 -; CHECK-NEXT: vrsub.vx v22, v16, a0 -; CHECK-NEXT: vrgather.vv v21, v10, v22 -; CHECK-NEXT: vrgather.vv v19, v12, v22 -; CHECK-NEXT: vrgather.vv v18, v13, v22 -; CHECK-NEXT: vrgather.vv v20, v11, v22 -; CHECK-NEXT: vmv2r.v v16, v18 -; CHECK-NEXT: vmv2r.v v18, v20 -; CHECK-NEXT: vrgather.vv v31, v8, v22 -; CHECK-NEXT: vrgather.vv v30, v9, v22 -; CHECK-NEXT: vrgather.vv v9, v14, v22 -; CHECK-NEXT: vrgather.vv v8, v15, v22 -; CHECK-NEXT: vmv2r.v v20, v30 -; CHECK-NEXT: vmv8r.v v8, v16 +; CHECK-NEXT: vrsub.vx v16, v16, a0 +; CHECK-NEXT: vrgather.vv v23, v8, v16 +; CHECK-NEXT: vrgather.vv v21, v10, v16 +; CHECK-NEXT: vrgather.vv v22, v9, v16 +; CHECK-NEXT: vrgather.vv v20, v11, v16 +; CHECK-NEXT: vrgather.vv v11, v12, v16 +; CHECK-NEXT: vrgather.vv v10, v13, v16 +; CHECK-NEXT: vrgather.vv v9, v14, v16 +; CHECK-NEXT: vrgather.vv v8, v15, v16 +; CHECK-NEXT: vmv2r.v v8, v10 +; CHECK-NEXT: vmv2r.v v10, v20 +; CHECK-NEXT: vmv2r.v v12, v22 ; CHECK-NEXT: ret %res = call @llvm.vector.reverse.nxv6i64( %a) ret %res diff --git a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll index 937b3e6636df8..12042975b5adf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/nearbyint-vp.ll @@ -22,22 +22,22 @@ define @vp_nearbyint_nxv1bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v11, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1bf16( %va, %m, i32 %evl) ret %v @@ -49,18 +49,18 @@ define @vp_nearbyint_nxv1bf16_unmasked( @llvm.vp.nearbyint.nxv1bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -76,22 +76,22 @@ define @vp_nearbyint_nxv2bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v11, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v11, v11, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v11, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2bf16( %va, %m, i32 %evl) ret %v @@ -103,18 +103,18 @@ define @vp_nearbyint_nxv2bf16_unmasked( @llvm.vp.nearbyint.nxv2bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -130,22 +130,22 @@ define @vp_nearbyint_nxv4bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v10, v12, v10, v0.t ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4bf16( %va, %m, i32 %evl) ret %v @@ -157,18 +157,18 @@ define @vp_nearbyint_nxv4bf16_unmasked( @llvm.vp.nearbyint.nxv4bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -184,22 +184,22 @@ define @vp_nearbyint_nxv8bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v12, v16, v12, v0.t ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8bf16( %va, %m, i32 %evl) ret %v @@ -211,18 +211,18 @@ define @vp_nearbyint_nxv8bf16_unmasked( @llvm.vp.nearbyint.nxv8bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -238,22 +238,22 @@ define @vp_nearbyint_nxv16bf16( %va ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16bf16( %va, %m, i32 %evl) ret %v @@ -265,18 +265,18 @@ define @vp_nearbyint_nxv16bf16_unmasked( @llvm.vp.nearbyint.nxv16bf16( %va, splat (i1 true), i32 %evl) ret %v @@ -297,6 +297,7 @@ define @vp_nearbyint_nxv32bf16( %va ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: lui a3, 307200 +; CHECK-NEXT: frflags a4 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 @@ -315,12 +316,11 @@ define @vp_nearbyint_nxv32bf16( %va ; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t -; CHECK-NEXT: frflags a2 ; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t -; CHECK-NEXT: fsflags a2 +; CHECK-NEXT: fsflags a4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; CHECK-NEXT: vmv1r.v v0, v17 @@ -336,21 +336,21 @@ define @vp_nearbyint_nxv32bf16( %va ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t ; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v16, v24, v0.t ; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; CHECK-NEXT: vfncvtbf16.f.f.w v8, v24, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add sp, sp, a0 @@ -375,11 +375,12 @@ define @vp_nearbyint_nxv32bf16_unmasked( @vp_nearbyint_nxv32bf16_unmasked( @vp_nearbyint_nxv32bf16_unmasked( @llvm.vp.nearbyint.nxv1f16(, @vp_nearbyint_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a0) +; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv1f16: @@ -461,22 +461,22 @@ define @vp_nearbyint_nxv1f16( %va, @llvm.vp.nearbyint.nxv1f16( %va, %m, i32 %evl) ret %v @@ -485,17 +485,17 @@ define @vp_nearbyint_nxv1f16( %va, @vp_nearbyint_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv1f16_unmasked: @@ -503,18 +503,18 @@ define @vp_nearbyint_nxv1f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1f16( %va, splat (i1 true), i32 %evl) ret %v @@ -525,19 +525,19 @@ declare @llvm.vp.nearbyint.nxv2f16(, @vp_nearbyint_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a0) +; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv2f16: @@ -547,22 +547,22 @@ define @vp_nearbyint_nxv2f16( %va, @llvm.vp.nearbyint.nxv2f16( %va, %m, i32 %evl) ret %v @@ -571,17 +571,17 @@ define @vp_nearbyint_nxv2f16( %va, @vp_nearbyint_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv2f16_unmasked: @@ -589,18 +589,18 @@ define @vp_nearbyint_nxv2f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v9, v8, v9, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v9 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2f16( %va, splat (i1 true), i32 %evl) ret %v @@ -611,19 +611,19 @@ declare @llvm.vp.nearbyint.nxv4f16(, @vp_nearbyint_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a0) +; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: frflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16: @@ -633,22 +633,22 @@ define @vp_nearbyint_nxv4f16( %va, @llvm.vp.nearbyint.nxv4f16( %va, %m, i32 %evl) ret %v @@ -657,17 +657,17 @@ define @vp_nearbyint_nxv4f16( %va, @vp_nearbyint_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv4f16_unmasked: @@ -675,18 +675,18 @@ define @vp_nearbyint_nxv4f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v10, v8, v10, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v10 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4f16( %va, splat (i1 true), i32 %evl) ret %v @@ -699,19 +699,19 @@ define @vp_nearbyint_nxv8f16( %va, @vp_nearbyint_nxv8f16( %va, @llvm.vp.nearbyint.nxv8f16( %va, %m, i32 %evl) ret %v @@ -745,17 +745,17 @@ define @vp_nearbyint_nxv8f16( %va, @vp_nearbyint_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m2, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv8f16_unmasked: @@ -763,18 +763,18 @@ define @vp_nearbyint_nxv8f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v12, v8, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v12 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8f16( %va, splat (i1 true), i32 %evl) ret %v @@ -787,19 +787,19 @@ define @vp_nearbyint_nxv16f16( %va, @vp_nearbyint_nxv16f16( %va, @llvm.vp.nearbyint.nxv16f16( %va, %m, i32 %evl) ret %v @@ -833,17 +833,17 @@ define @vp_nearbyint_nxv16f16( %va, @vp_nearbyint_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv16f16_unmasked: @@ -851,18 +851,18 @@ define @vp_nearbyint_nxv16f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v8, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f16( %va, splat (i1 true), i32 %evl) ret %v @@ -875,19 +875,19 @@ define @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16( %va, @vp_nearbyint_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_nearbyint_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; ZVFH-NEXT: frflags a0 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t +; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; ZVFH-NEXT: fsflags a0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: vp_nearbyint_nxv32f16_unmasked: @@ -995,11 +995,12 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: frflags a4 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: vmv1r.v v17, v16 @@ -1014,12 +1015,11 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t -; ZVFHMIN-NEXT: frflags a2 ; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t -; ZVFHMIN-NEXT: fsflags a2 +; ZVFHMIN-NEXT: fsflags a4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t ; ZVFHMIN-NEXT: vmv1r.v v0, v16 @@ -1033,17 +1033,17 @@ define @vp_nearbyint_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: frflags a0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t +; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v16, v24, v16, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfncvt.f.f.w v8, v16 -; ZVFHMIN-NEXT: fsflags a0 ; ZVFHMIN-NEXT: csrr a0, vlenb ; ZVFHMIN-NEXT: slli a0, a0, 3 ; ZVFHMIN-NEXT: add sp, sp, a0 @@ -1064,15 +1064,15 @@ define @vp_nearbyint_nxv1f32( %va, @llvm.vp.nearbyint.nxv1f32( %va, %m, i32 %evl) ret %v @@ -1085,13 +1085,13 @@ define @vp_nearbyint_nxv1f32_unmasked( ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1f32( %va, splat (i1 true), i32 %evl) ret %v @@ -1106,15 +1106,15 @@ define @vp_nearbyint_nxv2f32( %va, @llvm.vp.nearbyint.nxv2f32( %va, %m, i32 %evl) ret %v @@ -1127,13 +1127,13 @@ define @vp_nearbyint_nxv2f32_unmasked( ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2f32( %va, splat (i1 true), i32 %evl) ret %v @@ -1149,16 +1149,16 @@ define @vp_nearbyint_nxv4f32( %va, @llvm.vp.nearbyint.nxv4f32( %va, %m, i32 %evl) ret %v @@ -1171,13 +1171,13 @@ define @vp_nearbyint_nxv4f32_unmasked( ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4f32( %va, splat (i1 true), i32 %evl) ret %v @@ -1193,16 +1193,16 @@ define @vp_nearbyint_nxv8f32( %va, @llvm.vp.nearbyint.nxv8f32( %va, %m, i32 %evl) ret %v @@ -1215,13 +1215,13 @@ define @vp_nearbyint_nxv8f32_unmasked( ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8f32( %va, splat (i1 true), i32 %evl) ret %v @@ -1237,16 +1237,16 @@ define @vp_nearbyint_nxv16f32( %va, < ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v24, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv16f32( %va, %m, i32 %evl) ret %v @@ -1259,13 +1259,13 @@ define @vp_nearbyint_nxv16f32_unmasked( @llvm.vp.nearbyint.nxv16f32( %va, splat (i1 true), i32 %evl) ret %v @@ -1276,19 +1276,19 @@ declare @llvm.vp.nearbyint.nxv1f64(, define @vp_nearbyint_nxv1f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a0) +; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: frflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1f64( %va, %m, i32 %evl) ret %v @@ -1297,17 +1297,17 @@ define @vp_nearbyint_nxv1f64( %va, @vp_nearbyint_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v9, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv1f64( %va, splat (i1 true), i32 %evl) ret %v @@ -1320,19 +1320,19 @@ define @vp_nearbyint_nxv2f64( %va, @llvm.vp.nearbyint.nxv2f64( %va, %m, i32 %evl) ret %v @@ -1341,17 +1341,17 @@ define @vp_nearbyint_nxv2f64( %va, @vp_nearbyint_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v10, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv2f64( %va, splat (i1 true), i32 %evl) ret %v @@ -1364,19 +1364,19 @@ define @vp_nearbyint_nxv4f64( %va, @llvm.vp.nearbyint.nxv4f64( %va, %m, i32 %evl) ret %v @@ -1385,17 +1385,17 @@ define @vp_nearbyint_nxv4f64( %va, @vp_nearbyint_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v12, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv4f64( %va, splat (i1 true), i32 %evl) ret %v @@ -1408,19 +1408,19 @@ define @vp_nearbyint_nxv7f64( %va, @llvm.vp.nearbyint.nxv7f64( %va, %m, i32 %evl) ret %v @@ -1429,17 +1429,17 @@ define @vp_nearbyint_nxv7f64( %va, @vp_nearbyint_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv7f64( %va, splat (i1 true), i32 %evl) ret %v @@ -1452,19 +1452,19 @@ define @vp_nearbyint_nxv8f64( %va, @llvm.vp.nearbyint.nxv8f64( %va, %m, i32 %evl) ret %v @@ -1473,17 +1473,17 @@ define @vp_nearbyint_nxv8f64( %va, @vp_nearbyint_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_nearbyint_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a0) ; CHECK-NEXT: frflags a0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: ret %v = call @llvm.vp.nearbyint.nxv8f64( %va, splat (i1 true), i32 %evl) ret %v @@ -1498,59 +1498,66 @@ define @vp_nearbyint_nxv16f64( %va, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v25, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: frflags a3 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: frflags a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: fsflags a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t +; CHECK-NEXT: fsflags a3 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t ; CHECK-NEXT: frflags a0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t +; CHECK-NEXT: fsflags a0 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: fsflags a0 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -1570,13 +1577,13 @@ define @vp_nearbyint_nxv16f64_unmasked( @vp_nearbyint_nxv16f64_unmasked( @llvm.vp.nearbyint.nxv16f64( %va, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll index feb96deb920ff..b83439f6baa22 100644 --- a/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll +++ b/llvm/test/CodeGen/RISCV/rvv/no-reserved-frame.ll @@ -19,24 +19,23 @@ define signext i32 @foo(i32 signext %aa) #0 { ; CHECK-NEXT: sub sp, sp, a1 ; CHECK-NEXT: andi sp, sp, -16 ; CHECK-NEXT: mv s1, sp -; CHECK-NEXT: lw t0, 44(s1) +; CHECK-NEXT: sw a0, 52(s1) +; CHECK-NEXT: sw a0, 48(s1) +; CHECK-NEXT: lw a0, 44(s1) ; CHECK-NEXT: lw a2, 40(s1) ; CHECK-NEXT: lw a3, 36(s1) ; CHECK-NEXT: lw a4, 32(s1) ; CHECK-NEXT: lw a5, 28(s1) ; CHECK-NEXT: lw a6, 24(s1) ; CHECK-NEXT: lw a7, 20(s1) -; CHECK-NEXT: lw t1, 16(s1) -; CHECK-NEXT: lw t2, 12(s1) -; CHECK-NEXT: lw t3, 8(s1) -; CHECK-NEXT: sw a0, 52(s1) -; CHECK-NEXT: sw a0, 48(s1) +; CHECK-NEXT: lw a1, 16(s1) +; CHECK-NEXT: lw t0, 12(s1) +; CHECK-NEXT: lw t1, 8(s1) ; CHECK-NEXT: addi sp, sp, -32 +; CHECK-NEXT: sd a1, 0(sp) +; CHECK-NEXT: sd t0, 8(sp) +; CHECK-NEXT: sd t1, 16(sp) ; CHECK-NEXT: addi a1, s1, 48 -; CHECK-NEXT: sd t1, 0(sp) -; CHECK-NEXT: sd t2, 8(sp) -; CHECK-NEXT: sd t3, 16(sp) -; CHECK-NEXT: mv a0, t0 ; CHECK-NEXT: call gfunc ; CHECK-NEXT: addi sp, sp, 32 ; CHECK-NEXT: li a0, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/pr125306.ll b/llvm/test/CodeGen/RISCV/rvv/pr125306.ll index 111f87de220db..f3ac76eaace6f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr125306.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr125306.ll @@ -19,57 +19,57 @@ define <2 x i32> @main(ptr %0) { ; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: li a2, 64 +; CHECK-NEXT: li a1, 64 ; CHECK-NEXT: sw zero, 80(zero) -; CHECK-NEXT: lui a1, 7 +; CHECK-NEXT: lui a2, 7 ; CHECK-NEXT: lui a3, 1 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vid.v v11 ; CHECK-NEXT: li a4, 16 ; CHECK-NEXT: lui a5, 2 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vse32.v v10, (a2) +; CHECK-NEXT: vse32.v v10, (a1) ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: li a2, 24 +; CHECK-NEXT: li a1, 24 ; CHECK-NEXT: sh zero, -392(a3) ; CHECK-NEXT: sh zero, 534(a3) ; CHECK-NEXT: sh zero, 1460(a3) ; CHECK-NEXT: li a3, 32 -; CHECK-NEXT: vse32.v v10, (a2) -; CHECK-NEXT: li a2, 40 +; CHECK-NEXT: vse32.v v10, (a1) +; CHECK-NEXT: li a1, 40 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vse32.v v8, (a0) +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vadd.vi v9, v11, -1 ; CHECK-NEXT: sh zero, -1710(a5) ; CHECK-NEXT: sh zero, -784(a5) ; CHECK-NEXT: sh zero, 142(a5) -; CHECK-NEXT: lw a5, -304(a1) -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vadd.vi v9, v11, -1 ; CHECK-NEXT: vse32.v v10, (a3) ; CHECK-NEXT: sh zero, 0(a0) -; CHECK-NEXT: lw a0, -188(a1) -; CHECK-NEXT: vse32.v v10, (a2) -; CHECK-NEXT: lw a2, -188(a1) -; CHECK-NEXT: lw a3, 1244(a1) -; CHECK-NEXT: vmv.v.x v8, a0 -; CHECK-NEXT: lw a0, 1244(a1) -; CHECK-NEXT: lw a1, -304(a1) -; CHECK-NEXT: vmv.v.x v10, a3 -; CHECK-NEXT: vmv.v.x v11, a5 +; CHECK-NEXT: vse32.v v10, (a1) +; CHECK-NEXT: lw a0, 1244(a2) +; CHECK-NEXT: lw a1, 1244(a2) +; CHECK-NEXT: lw a3, -188(a2) +; CHECK-NEXT: lw a5, -188(a2) +; CHECK-NEXT: vmv.v.x v8, a3 +; CHECK-NEXT: lw a3, -304(a2) +; CHECK-NEXT: lw a2, -304(a2) +; CHECK-NEXT: sh zero, 0(zero) +; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmv.v.x v11, a3 ; CHECK-NEXT: vslide1down.vx v8, v8, zero ; CHECK-NEXT: vslide1down.vx v10, v10, zero ; CHECK-NEXT: vmin.vv v8, v10, v8 -; CHECK-NEXT: vmv.v.x v10, a0 +; CHECK-NEXT: vmv.v.x v10, a1 ; CHECK-NEXT: vslide1down.vx v11, v11, zero +; CHECK-NEXT: vmin.vx v10, v10, a5 ; CHECK-NEXT: vmin.vx v10, v10, a2 -; CHECK-NEXT: vmin.vx v10, v10, a1 ; CHECK-NEXT: vmin.vv v11, v8, v11 ; CHECK-NEXT: vmv1r.v v8, v10 ; CHECK-NEXT: vand.vv v9, v11, v9 -; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: vse32.v v9, (a4) -; CHECK-NEXT: sh zero, 0(zero) +; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: ret entry: store <16 x i32> zeroinitializer, ptr null, align 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll index dbd4224c7ef08..d09b200485092 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr63596.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr63596.ll @@ -18,12 +18,11 @@ define <4 x float> @foo(ptr %0) nounwind { ; CHECK-NEXT: lhu a0, 6(a0) ; CHECK-NEXT: fmv.w.x fa0, a0 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fmv.w.x fa5, s2 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: fmv.s fa0, fa5 +; CHECK-NEXT: fmv.w.x fa0, s2 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 @@ -36,12 +35,11 @@ define <4 x float> @foo(ptr %0) nounwind { ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: fmv.w.x fa0, s1 ; CHECK-NEXT: call __extendhfsf2 -; CHECK-NEXT: fmv.w.x fa5, s0 ; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 ; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: fmv.s fa0, fa5 +; CHECK-NEXT: fmv.w.x fa0, s0 ; CHECK-NEXT: call __extendhfsf2 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vfmv.s.f v8, fa0 diff --git a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll index 06a357eeaeb61..4be681ec51234 100644 --- a/llvm/test/CodeGen/RISCV/rvv/pr95865.ll +++ b/llvm/test/CodeGen/RISCV/rvv/pr95865.ll @@ -105,8 +105,8 @@ define i32 @main(i1 %arg.1, i64 %arg.2, i1 %arg.3, i64 %arg.4, i1 %arg.5, @vp_round_nxv1bf16( %va, @vp_round_nxv1bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -76,12 +76,12 @@ define @vp_round_nxv2bf16( %va, @vp_round_nxv2bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v8, v9 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v9, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -130,12 +130,12 @@ define @vp_round_nxv4bf16( %va, @vp_round_nxv4bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v8, v10 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v10, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -184,12 +184,12 @@ define @vp_round_nxv8bf16( %va, @vp_round_nxv8bf16_unmasked( % ; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8 ; CHECK-NEXT: lui a0, 307200 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v8, v12 -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vmflt.vf v0, v8, fa5 -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vfcvt.x.f.v v8, v12, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -238,12 +238,12 @@ define @vp_round_nxv16bf16( %va, @vp_round_nxv16bf16_unmasked( @vp_round_nxv32bf16( %va, @vp_round_nxv32bf16( %va, @vp_round_nxv32bf16( %va, @vp_round_nxv32bf16_unmasked( @vp_round_nxv32bf16_unmasked( @vp_round_nxv32bf16_unmasked( @llvm.vp.round.nxv1f16(, @vp_round_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a0) +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -461,12 +461,12 @@ define @vp_round_nxv1f16( %va, @vp_round_nxv1f16( %va, @vp_round_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -503,11 +503,11 @@ define @vp_round_nxv1f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -525,13 +525,13 @@ declare @llvm.vp.round.nxv2f16(, @vp_round_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a0) +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -547,12 +547,12 @@ define @vp_round_nxv2f16( %va, @vp_round_nxv2f16( %va, @vp_round_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -589,11 +589,11 @@ define @vp_round_nxv2f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -611,13 +611,13 @@ declare @llvm.vp.round.nxv4f16(, @vp_round_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a0) +; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 4 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -633,12 +633,12 @@ define @vp_round_nxv4f16( %va, @vp_round_nxv4f16( %va, @vp_round_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -675,11 +675,11 @@ define @vp_round_nxv4f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -699,12 +699,12 @@ define @vp_round_nxv8f16( %va, @vp_round_nxv8f16( %va, @vp_round_nxv8f16( %va, @vp_round_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -763,11 +763,11 @@ define @vp_round_nxv8f16_unmasked( %va, i ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -787,12 +787,12 @@ define @vp_round_nxv16f16( %va, @vp_round_nxv16f16( %va, @vp_round_nxv16f16( %va, @vp_round_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -851,11 +851,11 @@ define @vp_round_nxv16f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -875,12 +875,12 @@ define @vp_round_nxv32f16( %va, @vp_round_nxv32f16( %va, @vp_round_nxv32f16( %va, @vp_round_nxv32f16( %va, @vp_round_nxv32f16( %va, @vp_round_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_round_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; ZVFH-NEXT: fsrmi a0, 4 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -995,11 +995,12 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: fsrmi a4, 4 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: vmv1r.v v17, v16 @@ -1014,11 +1015,10 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a2, 4 ; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: fsrm a4 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t @@ -1033,10 +1033,10 @@ define @vp_round_nxv32f16_unmasked( %va ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 4 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1064,9 +1064,9 @@ define @vp_round_nxv1f32( %va, @vp_round_nxv1f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1106,9 +1106,9 @@ define @vp_round_nxv2f32( %va, @vp_round_nxv2f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1149,9 +1149,9 @@ define @vp_round_nxv4f32( %va, @vp_round_nxv4f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1193,9 +1193,9 @@ define @vp_round_nxv8f32( %va, @vp_round_nxv8f32_unmasked( %va, ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1237,9 +1237,9 @@ define @vp_round_nxv16f32( %va, @vp_round_nxv16f32_unmasked( % ; CHECK-NEXT: vfabs.v v16, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1276,13 +1276,13 @@ declare @llvm.vp.round.nxv1f64(, @vp_round_nxv1f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a0) +; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 4 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -1297,12 +1297,12 @@ define @vp_round_nxv1f64( %va, @vp_round_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1320,12 +1320,12 @@ define @vp_round_nxv2f64( %va, @vp_round_nxv2f64( %va, @vp_round_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1364,12 +1364,12 @@ define @vp_round_nxv4f64( %va, @vp_round_nxv4f64( %va, @vp_round_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1408,12 +1408,12 @@ define @vp_round_nxv7f64( %va, @vp_round_nxv7f64( %va, @vp_round_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1452,12 +1452,12 @@ define @vp_round_nxv8f64( %va, @vp_round_nxv8f64( %va, @vp_round_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_round_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a0) ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1498,59 +1498,66 @@ define @vp_round_nxv16f64( %va, @vp_round_nxv16f64_unmasked( ; CHECK-NEXT: sltu a2, a0, a3 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a3 +; CHECK-NEXT: fsrmi a3, 4 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16 ; CHECK-NEXT: vmflt.vf v0, v24, fa5 -; CHECK-NEXT: fsrmi a2, 4 ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a3 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t @@ -1585,8 +1592,8 @@ define @vp_round_nxv16f64_unmasked( ; CHECK-NEXT: .LBB45_2: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v8 -; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: fsrmi a0, 4 +; CHECK-NEXT: vmflt.vf v0, v24, fa5 ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t diff --git a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll index df5844277c997..d4043fd8b6816 100644 --- a/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/roundeven-vp.ll @@ -22,12 +22,12 @@ define @vp_roundeven_nxv1bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v11, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -49,11 +49,11 @@ define @vp_roundeven_nxv1bf16_unmasked( @vp_roundeven_nxv2bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v11, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -103,11 +103,11 @@ define @vp_roundeven_nxv2bf16_unmasked( @vp_roundeven_nxv4bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -157,11 +157,11 @@ define @vp_roundeven_nxv4bf16_unmasked( @vp_roundeven_nxv8bf16( %va, < ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -211,11 +211,11 @@ define @vp_roundeven_nxv8bf16_unmasked( @vp_roundeven_nxv16bf16( %va ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t @@ -265,11 +265,11 @@ define @vp_roundeven_nxv16bf16_unmasked( @vp_roundeven_nxv32bf16( %va ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: lui a3, 307200 +; CHECK-NEXT: fsrmi a4, 0 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 @@ -315,11 +316,10 @@ define @vp_roundeven_nxv32bf16( %va ; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 0 ; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a4 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t @@ -336,11 +336,11 @@ define @vp_roundeven_nxv32bf16( %va ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t ; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t @@ -375,11 +375,12 @@ define @vp_roundeven_nxv32bf16_unmasked( @vp_roundeven_nxv32bf16_unmasked( @vp_roundeven_nxv32bf16_unmasked( @llvm.vp.roundeven.nxv1f16(, @vp_roundeven_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a0) +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -461,12 +461,12 @@ define @vp_roundeven_nxv1f16( %va, @vp_roundeven_nxv1f16( %va, @vp_roundeven_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -503,11 +503,11 @@ define @vp_roundeven_nxv1f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -525,13 +525,13 @@ declare @llvm.vp.roundeven.nxv2f16(, @vp_roundeven_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a0) +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -547,12 +547,12 @@ define @vp_roundeven_nxv2f16( %va, @vp_roundeven_nxv2f16( %va, @vp_roundeven_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -589,11 +589,11 @@ define @vp_roundeven_nxv2f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -611,13 +611,13 @@ declare @llvm.vp.roundeven.nxv4f16(, @vp_roundeven_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a0) +; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 0 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -633,12 +633,12 @@ define @vp_roundeven_nxv4f16( %va, @vp_roundeven_nxv4f16( %va, @vp_roundeven_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -675,11 +675,11 @@ define @vp_roundeven_nxv4f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -699,12 +699,12 @@ define @vp_roundeven_nxv8f16( %va, @vp_roundeven_nxv8f16( %va, @vp_roundeven_nxv8f16( %va, @vp_roundeven_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -763,11 +763,11 @@ define @vp_roundeven_nxv8f16_unmasked( %v ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -787,12 +787,12 @@ define @vp_roundeven_nxv16f16( %va, @vp_roundeven_nxv16f16( %va, @vp_roundeven_nxv16f16( %va, @vp_roundeven_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -851,11 +851,11 @@ define @vp_roundeven_nxv16f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v16 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -875,12 +875,12 @@ define @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16( %va, @vp_roundeven_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundeven_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; ZVFH-NEXT: fsrmi a0, 0 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -995,11 +995,12 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; ZVFHMIN-NEXT: vmset.m v16 ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: fsrmi a4, 0 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 ; ZVFHMIN-NEXT: sub a3, a0, a1 -; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma ; ZVFHMIN-NEXT: vslidedown.vx v16, v16, a2 ; ZVFHMIN-NEXT: sltu a2, a0, a3 ; ZVFHMIN-NEXT: vmv1r.v v17, v16 @@ -1014,11 +1015,10 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v17, v8, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a2, 0 ; ZVFHMIN-NEXT: vmv1r.v v0, v17 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: fsrm a4 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t @@ -1033,10 +1033,10 @@ define @vp_roundeven_nxv32f16_unmasked( ; ZVFHMIN-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v24 +; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16 ; ZVFHMIN-NEXT: vmflt.vf v0, v24, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 0 ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v24, v24, v0.t @@ -1064,9 +1064,9 @@ define @vp_roundeven_nxv1f32( %va, @vp_roundeven_nxv1f32_unmasked( ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1106,9 +1106,9 @@ define @vp_roundeven_nxv2f32( %va, @vp_roundeven_nxv2f32_unmasked( ; CHECK-NEXT: vfabs.v v9, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1149,9 +1149,9 @@ define @vp_roundeven_nxv4f32( %va, @vp_roundeven_nxv4f32_unmasked( ; CHECK-NEXT: vfabs.v v10, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1193,9 +1193,9 @@ define @vp_roundeven_nxv8f32( %va, @vp_roundeven_nxv8f32_unmasked( ; CHECK-NEXT: vfabs.v v12, v8 ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1237,9 +1237,9 @@ define @vp_roundeven_nxv16f32( %va, < ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1259,8 +1259,8 @@ define @vp_roundeven_nxv16f32_unmasked( @llvm.vp.roundeven.nxv1f64(, define @vp_roundeven_nxv1f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a0) +; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 0 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -1297,12 +1297,12 @@ define @vp_roundeven_nxv1f64( %va, @vp_roundeven_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1320,12 +1320,12 @@ define @vp_roundeven_nxv2f64( %va, @vp_roundeven_nxv2f64( %va, @vp_roundeven_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1364,12 +1364,12 @@ define @vp_roundeven_nxv4f64( %va, @vp_roundeven_nxv4f64( %va, @vp_roundeven_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1408,12 +1408,12 @@ define @vp_roundeven_nxv7f64( %va, @vp_roundeven_nxv7f64( %va, @vp_roundeven_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1452,12 +1452,12 @@ define @vp_roundeven_nxv8f64( %va, @vp_roundeven_nxv8f64( %va, @vp_roundeven_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundeven_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a0) ; CHECK-NEXT: fsrmi a0, 0 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1498,59 +1498,66 @@ define @vp_roundeven_nxv16f64( %va, ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v25, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: fsrmi a3, 0 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 0 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 0 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -1570,12 +1577,12 @@ define @vp_roundeven_nxv16f64_unmasked( @vp_roundeven_nxv16f64_unmasked( @vp_roundtozero_nxv1bf16( %va, ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfabs.v v11, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, mu ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -49,11 +49,11 @@ define @vp_roundtozero_nxv1bf16_unmasked( @vp_roundtozero_nxv2bf16( %va, ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfabs.v v11, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, mu ; CHECK-NEXT: vmflt.vf v8, v11, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv.v.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v11, v10, v0.t @@ -103,11 +103,11 @@ define @vp_roundtozero_nxv2bf16_unmasked( @vp_roundtozero_nxv4bf16( %va, ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfabs.v v12, v10, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, mu ; CHECK-NEXT: vmflt.vf v8, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v10, v0.t @@ -157,11 +157,11 @@ define @vp_roundtozero_nxv4bf16_unmasked( @vp_roundtozero_nxv8bf16( %va, ; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfabs.v v16, v12, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v12, v0.t @@ -211,11 +211,11 @@ define @vp_roundtozero_nxv8bf16_unmasked( @vp_roundtozero_nxv16bf16( % ; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: vmv1r.v v8, v0 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v24, v16, v0.t -; CHECK-NEXT: fmv.w.x fa5, a0 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t @@ -265,11 +265,11 @@ define @vp_roundtozero_nxv16bf16_unmasked( @vp_roundtozero_nxv32bf16( % ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: lui a3, 307200 +; CHECK-NEXT: fsrmi a4, 1 ; CHECK-NEXT: slli a1, a2, 1 ; CHECK-NEXT: srli a2, a2, 2 ; CHECK-NEXT: fmv.w.x fa5, a3 @@ -315,11 +316,10 @@ define @vp_roundtozero_nxv32bf16( % ; CHECK-NEXT: vfabs.v v8, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v18, v8, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 1 ; CHECK-NEXT: vmv1r.v v0, v18 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v8, v24, v0.t -; CHECK-NEXT: fsrm a2 +; CHECK-NEXT: fsrm a4 ; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v24, v8, v24, v0.t @@ -336,11 +336,11 @@ define @vp_roundtozero_nxv32bf16( % ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v16, v0.t ; CHECK-NEXT: vmv1r.v v8, v7 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v24, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v8, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v24, v0.t @@ -375,11 +375,12 @@ define @vp_roundtozero_nxv32bf16_unmasked( @vp_roundtozero_nxv32bf16_unmasked( @vp_roundtozero_nxv32bf16_unmasked( @llvm.vp.roundtozero.nxv1f16(, @vp_roundtozero_nxv1f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv1f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI12_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI12_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI12_0)(a0) +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -461,12 +461,12 @@ define @vp_roundtozero_nxv1f16( %va, @vp_roundtozero_nxv1f16( %va, @vp_roundtozero_nxv1f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv1f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI13_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI13_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI13_0)(a0) ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -503,11 +503,11 @@ define @vp_roundtozero_nxv1f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, mf2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -525,13 +525,13 @@ declare @llvm.vp.roundtozero.nxv2f16(, @vp_roundtozero_nxv2f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv2f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI14_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI14_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI14_0)(a0) +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -547,12 +547,12 @@ define @vp_roundtozero_nxv2f16( %va, @vp_roundtozero_nxv2f16( %va, @vp_roundtozero_nxv2f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv2f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI15_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI15_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI15_0)(a0) ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -589,11 +589,11 @@ define @vp_roundtozero_nxv2f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, mf2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v9, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v9 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v9, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -611,13 +611,13 @@ declare @llvm.vp.roundtozero.nxv4f16(, @vp_roundtozero_nxv4f16( %va, %m, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv4f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI16_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8, v0.t +; ZVFH-NEXT: lui a0, %hi(.LCPI16_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI16_0)(a0) +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, mu ; ZVFH-NEXT: vmflt.vf v0, v9, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 @@ -633,12 +633,12 @@ define @vp_roundtozero_nxv4f16( %va, @vp_roundtozero_nxv4f16( %va, @vp_roundtozero_nxv4f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv4f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI17_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFH-NEXT: vfabs.v v9, v8 -; ZVFH-NEXT: vmflt.vf v0, v9, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI17_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI17_0)(a0) ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v9, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v9, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -675,11 +675,11 @@ define @vp_roundtozero_nxv4f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v10 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v10, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -699,12 +699,12 @@ define @vp_roundtozero_nxv8f16( %va, @vp_roundtozero_nxv8f16( %va, @vp_roundtozero_nxv8f16( %va, @vp_roundtozero_nxv8f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv8f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI19_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFH-NEXT: vfabs.v v10, v8 -; ZVFH-NEXT: vmflt.vf v0, v10, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI19_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI19_0)(a0) ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v10, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v10, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -763,11 +763,11 @@ define @vp_roundtozero_nxv8f16_unmasked( ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m2, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v12, v8 ; ZVFHMIN-NEXT: lui a0, 307200 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; ZVFHMIN-NEXT: vfabs.v v8, v12 -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vmflt.vf v0, v8, fa5 -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v12, v0.t ; ZVFHMIN-NEXT: fsrm a0 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t @@ -787,12 +787,12 @@ define @vp_roundtozero_nxv16f16( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vmv1r.v v12, v0 +; ZVFH-NEXT: vfabs.v v16, v8, v0.t ; ZVFH-NEXT: lui a0, %hi(.LCPI20_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI20_0)(a0) -; ZVFH-NEXT: vfabs.v v16, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, mu ; ZVFH-NEXT: vmflt.vf v12, v16, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vmv1r.v v0, v12 ; ZVFH-NEXT: vsetvli zero, zero, e16, m4, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -809,12 +809,12 @@ define @vp_roundtozero_nxv16f16( %va, < ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v8, v0.t ; ZVFHMIN-NEXT: lui a0, 307200 ; ZVFHMIN-NEXT: vmv1r.v v8, v0 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v24, v16, v0.t -; ZVFHMIN-NEXT: fmv.w.x fa5, a0 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v24, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v24, v16, v0.t @@ -833,12 +833,12 @@ define @vp_roundtozero_nxv16f16( %va, < define @vp_roundtozero_nxv16f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv16f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI21_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFH-NEXT: vfabs.v v12, v8 -; ZVFH-NEXT: vmflt.vf v0, v12, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI21_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI21_0)(a0) ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v12, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v12, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -851,11 +851,11 @@ define @vp_roundtozero_nxv16f16_unmasked( @vp_roundtozero_nxv32f16( %va, < ; ZVFH: # %bb.0: ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vmv1r.v v16, v0 +; ZVFH-NEXT: vfabs.v v24, v8, v0.t ; ZVFH-NEXT: lui a0, %hi(.LCPI22_0) ; ZVFH-NEXT: flh fa5, %lo(.LCPI22_0)(a0) -; ZVFH-NEXT: vfabs.v v24, v8, v0.t +; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, mu ; ZVFH-NEXT: vmflt.vf v16, v24, fa5, v0.t -; ZVFH-NEXT: fsrmi a0, 1 ; ZVFH-NEXT: vmv1r.v v0, v16 ; ZVFH-NEXT: vsetvli zero, zero, e16, m8, ta, ma ; ZVFH-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -902,6 +902,7 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: vmv1r.v v7, v0 ; ZVFHMIN-NEXT: csrr a2, vlenb ; ZVFHMIN-NEXT: lui a3, 307200 +; ZVFHMIN-NEXT: fsrmi a4, 1 ; ZVFHMIN-NEXT: slli a1, a2, 1 ; ZVFHMIN-NEXT: srli a2, a2, 2 ; ZVFHMIN-NEXT: fmv.w.x fa5, a3 @@ -920,11 +921,10 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: vfabs.v v8, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v18, v8, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a2, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v18 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v8, v24, v0.t -; ZVFHMIN-NEXT: fsrm a2 +; ZVFHMIN-NEXT: fsrm a4 ; ZVFHMIN-NEXT: vfcvt.f.x.v v8, v8, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vfsgnj.vv v24, v8, v24, v0.t @@ -941,11 +941,11 @@ define @vp_roundtozero_nxv32f16( %va, < ; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v24, v16, v0.t ; ZVFHMIN-NEXT: vmv1r.v v8, v7 +; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfabs.v v16, v24, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; ZVFHMIN-NEXT: vmflt.vf v8, v16, fa5, v0.t -; ZVFHMIN-NEXT: fsrmi a0, 1 ; ZVFHMIN-NEXT: vmv1r.v v0, v8 ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; ZVFHMIN-NEXT: vfcvt.x.f.v v16, v24, v0.t @@ -970,12 +970,12 @@ define @vp_roundtozero_nxv32f16( %va, < define @vp_roundtozero_nxv32f16_unmasked( %va, i32 zeroext %evl) { ; ZVFH-LABEL: vp_roundtozero_nxv32f16_unmasked: ; ZVFH: # %bb.0: -; ZVFH-NEXT: lui a1, %hi(.LCPI23_0) -; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a1) ; ZVFH-NEXT: vsetvli zero, a0, e16, m8, ta, ma ; ZVFH-NEXT: vfabs.v v16, v8 -; ZVFH-NEXT: vmflt.vf v0, v16, fa5 +; ZVFH-NEXT: lui a0, %hi(.LCPI23_0) +; ZVFH-NEXT: flh fa5, %lo(.LCPI23_0)(a0) ; ZVFH-NEXT: fsrmi a0, 1 +; ZVFH-NEXT: vmflt.vf v0, v16, fa5 ; ZVFH-NEXT: vfcvt.x.f.v v16, v8, v0.t ; ZVFH-NEXT: fsrm a0 ; ZVFH-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -995,11 +995,12 @@ define @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv32f16_unmasked( @vp_roundtozero_nxv1f32( %va, @vp_roundtozero_nxv1f32_unmasked( @vp_roundtozero_nxv2f32( %va, @vp_roundtozero_nxv2f32_unmasked( @vp_roundtozero_nxv4f32( %va, @vp_roundtozero_nxv4f32_unmasked( @vp_roundtozero_nxv8f32( %va, @vp_roundtozero_nxv8f32_unmasked( @vp_roundtozero_nxv16f32( %va, ; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, 307200 ; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1259,8 +1259,8 @@ define @vp_roundtozero_nxv16f32_unmasked( @llvm.vp.roundtozero.nxv1f64( define @vp_roundtozero_nxv1f64( %va, %m, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1f64: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI34_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8, v0.t +; CHECK-NEXT: lui a0, %hi(.LCPI34_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI34_0)(a0) +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, mu ; CHECK-NEXT: vmflt.vf v0, v9, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 @@ -1297,12 +1297,12 @@ define @vp_roundtozero_nxv1f64( %va, define @vp_roundtozero_nxv1f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv1f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI35_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma ; CHECK-NEXT: vfabs.v v9, v8 -; CHECK-NEXT: vmflt.vf v0, v9, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI35_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI35_0)(a0) ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v9, fa5 ; CHECK-NEXT: vfcvt.x.f.v v9, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v9, v9, v0.t @@ -1320,12 +1320,12 @@ define @vp_roundtozero_nxv2f64( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vmv1r.v v10, v0 +; CHECK-NEXT: vfabs.v v12, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI36_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI36_0)(a0) -; CHECK-NEXT: vfabs.v v12, v8, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, mu ; CHECK-NEXT: vmflt.vf v10, v12, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t @@ -1341,12 +1341,12 @@ define @vp_roundtozero_nxv2f64( %va, define @vp_roundtozero_nxv2f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv2f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI37_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m2, ta, ma ; CHECK-NEXT: vfabs.v v10, v8 -; CHECK-NEXT: vmflt.vf v0, v10, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI37_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI37_0)(a0) ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v10, fa5 ; CHECK-NEXT: vfcvt.x.f.v v10, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v10, v10, v0.t @@ -1364,12 +1364,12 @@ define @vp_roundtozero_nxv4f64( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vmv1r.v v12, v0 +; CHECK-NEXT: vfabs.v v16, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI38_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI38_0)(a0) -; CHECK-NEXT: vfabs.v v16, v8, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, mu ; CHECK-NEXT: vmflt.vf v12, v16, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v12 ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t @@ -1385,12 +1385,12 @@ define @vp_roundtozero_nxv4f64( %va, define @vp_roundtozero_nxv4f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv4f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI39_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m4, ta, ma ; CHECK-NEXT: vfabs.v v12, v8 -; CHECK-NEXT: vmflt.vf v0, v12, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI39_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI39_0)(a0) ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v12, fa5 ; CHECK-NEXT: vfcvt.x.f.v v12, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v12, v12, v0.t @@ -1408,12 +1408,12 @@ define @vp_roundtozero_nxv7f64( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI40_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI40_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1429,12 +1429,12 @@ define @vp_roundtozero_nxv7f64( %va, define @vp_roundtozero_nxv7f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv7f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI41_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI41_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI41_0)(a0) ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1452,12 +1452,12 @@ define @vp_roundtozero_nxv8f64( %va, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vmv1r.v v16, v0 +; CHECK-NEXT: vfabs.v v24, v8, v0.t ; CHECK-NEXT: lui a0, %hi(.LCPI42_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI42_0)(a0) -; CHECK-NEXT: vfabs.v v24, v8, v0.t +; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vmflt.vf v16, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a0, 1 ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v24, v8, v0.t @@ -1473,12 +1473,12 @@ define @vp_roundtozero_nxv8f64( %va, define @vp_roundtozero_nxv8f64_unmasked( %va, i32 zeroext %evl) { ; CHECK-LABEL: vp_roundtozero_nxv8f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI43_0) -; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8 -; CHECK-NEXT: vmflt.vf v0, v16, fa5 +; CHECK-NEXT: lui a0, %hi(.LCPI43_0) +; CHECK-NEXT: fld fa5, %lo(.LCPI43_0)(a0) ; CHECK-NEXT: fsrmi a0, 1 +; CHECK-NEXT: vmflt.vf v0, v16, fa5 ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t @@ -1498,59 +1498,66 @@ define @vp_roundtozero_nxv16f64( %v ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: slli a1, a1, 4 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v7, v0 +; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: lui a2, %hi(.LCPI44_0) ; CHECK-NEXT: srli a3, a1, 3 ; CHECK-NEXT: fld fa5, %lo(.LCPI44_0)(a2) ; CHECK-NEXT: sub a2, a0, a1 -; CHECK-NEXT: vslidedown.vx v6, v0, a3 +; CHECK-NEXT: vslidedown.vx v25, v0, a3 ; CHECK-NEXT: sltu a3, a0, a2 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a2, a3, a2 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: fsrmi a3, 1 +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma -; CHECK-NEXT: vfabs.v v24, v16, v0.t +; CHECK-NEXT: vfabs.v v8, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v6, v24, fa5, v0.t -; CHECK-NEXT: fsrmi a2, 1 -; CHECK-NEXT: vmv1r.v v0, v6 +; CHECK-NEXT: vmflt.vf v25, v8, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v25 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma -; CHECK-NEXT: vfcvt.x.f.v v24, v16, v0.t -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a3) # Unknown-size Folded Spill -; CHECK-NEXT: fsrm a2 -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a2) # Unknown-size Folded Reload -; CHECK-NEXT: vfcvt.f.x.v v24, v24, v0.t +; CHECK-NEXT: vfcvt.x.f.v v8, v16, v0.t +; CHECK-NEXT: fsrm a3 +; CHECK-NEXT: vfcvt.f.x.v v8, v8, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vfsgnj.vv v16, v24, v16, v0.t +; CHECK-NEXT: vfsgnj.vv v16, v8, v16, v0.t +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 ; CHECK-NEXT: vs8r.v v16, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: bltu a0, a1, .LBB44_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a0, a1 ; CHECK-NEXT: .LBB44_2: -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vmv1r.v v0, v24 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vfabs.v v16, v8, v0.t -; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu -; CHECK-NEXT: vmflt.vf v7, v16, fa5, v0.t ; CHECK-NEXT: fsrmi a0, 1 -; CHECK-NEXT: vmv1r.v v0, v7 +; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu +; CHECK-NEXT: vmflt.vf v24, v16, fa5, v0.t +; CHECK-NEXT: vmv1r.v v0, v24 ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; CHECK-NEXT: vfcvt.x.f.v v16, v8, v0.t ; CHECK-NEXT: fsrm a0 ; CHECK-NEXT: vfcvt.f.x.v v16, v16, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m8, ta, mu ; CHECK-NEXT: vfsgnj.vv v8, v16, v8, v0.t -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -1570,12 +1577,12 @@ define @vp_roundtozero_nxv16f64_unmasked( @vp_roundtozero_nxv16f64_unmasked( @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7, %w, %x, %y, %z) { ; CHECK-LABEL: bar: ; CHECK: # %bb.0: -; CHECK-NEXT: ld a0, 0(sp) -; CHECK-NEXT: ld a1, 8(sp) +; CHECK-NEXT: ld a0, 8(sp) ; CHECK-NEXT: vl8re32.v v24, (a0) -; CHECK-NEXT: vl8re32.v v0, (a1) +; CHECK-NEXT: ld a0, 0(sp) +; CHECK-NEXT: vl8re32.v v0, (a0) ; CHECK-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; CHECK-NEXT: vadd.vv v8, v8, v24 -; CHECK-NEXT: vadd.vv v16, v16, v0 +; CHECK-NEXT: vadd.vv v8, v8, v0 +; CHECK-NEXT: vadd.vv v16, v16, v24 ; CHECK-NEXT: vadd.vv v8, v8, v16 ; CHECK-NEXT: ret %s0 = add %w, %y diff --git a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll index 403cc0eb9dce1..f6417fa29ea0f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll +++ b/llvm/test/CodeGen/RISCV/rvv/rvv-peephole-vmerge-vops.ll @@ -969,8 +969,8 @@ define @vfredusum( %passthru, @llvm.riscv.vfredusum.nxv2f32.nxv2f32( %passthru, diff --git a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll index e6272701a6033..9a4121b41c3f3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/setcc-fp-vp.ll @@ -1358,13 +1358,10 @@ define @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @fcmp_oeq_vv_nxv64bf16( %va, @llvm.vp.fcmp.nxv64f16(, @fcmp_oeq_vv_nxv64f16( %va, %vb, %m, i32 zeroext %evl) { ; ZVFH-LABEL: fcmp_oeq_vv_nxv64f16: ; ZVFH: # %bb.0: -; ZVFH-NEXT: addi sp, sp, -16 -; ZVFH-NEXT: .cfi_def_cfa_offset 16 -; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 4 -; ZVFH-NEXT: sub sp, sp, a1 -; ZVFH-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; ZVFH-NEXT: vsetvli a1, zero, e8, m1, ta, ma -; ZVFH-NEXT: vmv1r.v v24, v0 +; ZVFH-NEXT: vmv1r.v v7, v0 ; ZVFH-NEXT: csrr a1, vlenb -; ZVFH-NEXT: slli a1, a1, 3 -; ZVFH-NEXT: add a1, sp, a1 -; ZVFH-NEXT: addi a1, a1, 16 -; ZVFH-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; ZVFH-NEXT: csrr a3, vlenb -; ZVFH-NEXT: srli a1, a3, 1 -; ZVFH-NEXT: slli a4, a3, 3 -; ZVFH-NEXT: slli a3, a3, 2 +; ZVFH-NEXT: slli a4, a1, 3 +; ZVFH-NEXT: slli a3, a1, 2 ; ZVFH-NEXT: add a4, a0, a4 ; ZVFH-NEXT: sub a5, a2, a3 -; ZVFH-NEXT: vl8re16.v v8, (a4) +; ZVFH-NEXT: vl8re16.v v24, (a4) ; ZVFH-NEXT: sltu a4, a2, a5 ; ZVFH-NEXT: addi a4, a4, -1 -; ZVFH-NEXT: vl8re16.v v0, (a0) -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; ZVFH-NEXT: vslidedown.vx v0, v24, a1 ; ZVFH-NEXT: and a4, a4, a5 +; ZVFH-NEXT: srli a1, a1, 1 +; ZVFH-NEXT: vslidedown.vx v0, v0, a1 ; ZVFH-NEXT: vsetvli zero, a4, e16, m8, ta, ma -; ZVFH-NEXT: vmfeq.vv v7, v16, v8, v0.t +; ZVFH-NEXT: vmfeq.vv v6, v16, v24, v0.t +; ZVFH-NEXT: vl8re16.v v24, (a0) ; ZVFH-NEXT: bltu a2, a3, .LBB171_2 ; ZVFH-NEXT: # %bb.1: ; ZVFH-NEXT: mv a2, a3 ; ZVFH-NEXT: .LBB171_2: -; ZVFH-NEXT: vmv1r.v v0, v24 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 3 -; ZVFH-NEXT: add a0, sp, a0 -; ZVFH-NEXT: addi a0, a0, 16 -; ZVFH-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; ZVFH-NEXT: addi a0, sp, 16 -; ZVFH-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; ZVFH-NEXT: vmv1r.v v0, v7 ; ZVFH-NEXT: vsetvli zero, a2, e16, m8, ta, ma ; ZVFH-NEXT: vmfeq.vv v16, v8, v24, v0.t ; ZVFH-NEXT: add a0, a1, a1 ; ZVFH-NEXT: vsetvli zero, a0, e8, m1, ta, ma -; ZVFH-NEXT: vslideup.vx v16, v7, a1 +; ZVFH-NEXT: vslideup.vx v16, v6, a1 ; ZVFH-NEXT: vmv.v.v v0, v16 -; ZVFH-NEXT: csrr a0, vlenb -; ZVFH-NEXT: slli a0, a0, 4 -; ZVFH-NEXT: add sp, sp, a0 -; ZVFH-NEXT: .cfi_def_cfa sp, 16 -; ZVFH-NEXT: addi sp, sp, 16 -; ZVFH-NEXT: .cfi_def_cfa_offset 0 ; ZVFH-NEXT: ret ; ; ZVFHMIN-LABEL: fcmp_oeq_vv_nxv64f16: @@ -3558,13 +3522,10 @@ define @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @fcmp_oeq_vv_nxv64f16( %va, @llvm.vp.icmp.nxv128i8(, @icmp_eq_vv_nxv128i8( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vv_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a2, a0, a1 ; CHECK-NEXT: sub a4, a3, a1 -; CHECK-NEXT: vl8r.v v8, (a2) +; CHECK-NEXT: vl8r.v v24, (a2) ; CHECK-NEXT: sltu a2, a3, a4 -; CHECK-NEXT: vl8r.v v24, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a4 ; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma -; CHECK-NEXT: vmseq.vv v6, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v6, v16, v24, v0.t +; CHECK-NEXT: vl8r.v v24, (a0) ; CHECK-NEXT: bltu a3, a1, .LBB96_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a3, a1 ; CHECK-NEXT: .LBB96_2: ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a3, e8, m8, ta, ma ; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vmv1r.v v8, v6 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv128i8( %va, %vb, metadata !"eq", %m, i32 %evl) ret %v @@ -2223,59 +2197,33 @@ declare @llvm.vp.icmp.nxv32i32(, @icmp_eq_vv_nxv32i32( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: icmp_eq_vv_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a3, vlenb -; CHECK-NEXT: srli a1, a3, 2 -; CHECK-NEXT: slli a4, a3, 3 -; CHECK-NEXT: slli a3, a3, 1 +; CHECK-NEXT: slli a4, a1, 3 +; CHECK-NEXT: slli a3, a1, 1 ; CHECK-NEXT: add a4, a0, a4 ; CHECK-NEXT: sub a5, a2, a3 -; CHECK-NEXT: vl8re32.v v8, (a4) +; CHECK-NEXT: vl8re32.v v24, (a4) ; CHECK-NEXT: sltu a4, a2, a5 ; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: vl8re32.v v0, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v0, v24, a1 ; CHECK-NEXT: and a4, a4, a5 +; CHECK-NEXT: srli a1, a1, 2 +; CHECK-NEXT: vslidedown.vx v0, v0, a1 ; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmseq.vv v7, v16, v8, v0.t +; CHECK-NEXT: vmseq.vv v6, v16, v24, v0.t +; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: bltu a2, a3, .LBB189_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB189_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vmseq.vv v16, v8, v24, v0.t ; CHECK-NEXT: add a0, a1, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma -; CHECK-NEXT: vslideup.vx v16, v7, a1 +; CHECK-NEXT: vslideup.vx v16, v6, a1 ; CHECK-NEXT: vmv1r.v v0, v16 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.icmp.nxv32i32( %va, %vb, metadata !"eq", %m, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/shrinkwrap.ll b/llvm/test/CodeGen/RISCV/rvv/shrinkwrap.ll index d12f2c889650f..eb6635117d0a1 100644 --- a/llvm/test/CodeGen/RISCV/rvv/shrinkwrap.ll +++ b/llvm/test/CodeGen/RISCV/rvv/shrinkwrap.ll @@ -17,8 +17,8 @@ define void @vecaddr_straightline(i32 zeroext %a, ptr %p) { ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (s0) ; RV32-NEXT: vadd.vi v8, v8, 1 -; RV32-NEXT: li a1, 57 ; RV32-NEXT: vse32.v v8, (s0) +; RV32-NEXT: li a1, 57 ; RV32-NEXT: beq a0, a1, .LBB0_2 ; RV32-NEXT: # %bb.1: # %do_call ; RV32-NEXT: call foo @@ -47,8 +47,8 @@ define void @vecaddr_straightline(i32 zeroext %a, ptr %p) { ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (s0) ; RV64-NEXT: vadd.vi v8, v8, 1 -; RV64-NEXT: li a1, 57 ; RV64-NEXT: vse32.v v8, (s0) +; RV64-NEXT: li a1, 57 ; RV64-NEXT: beq a0, a1, .LBB0_2 ; RV64-NEXT: # %bb.1: # %do_call ; RV64-NEXT: call foo @@ -97,8 +97,8 @@ define void @vecaddr_loop(i32 zeroext %a, ptr %p) { ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vle32.v v8, (s0) ; RV32-NEXT: vadd.vi v8, v8, 1 -; RV32-NEXT: li a1, 57 ; RV32-NEXT: vse32.v v8, (s0) +; RV32-NEXT: li a1, 57 ; RV32-NEXT: beq a0, a1, .LBB1_2 ; RV32-NEXT: .LBB1_1: # %do_call ; RV32-NEXT: # =>This Inner Loop Header: Depth=1 @@ -129,8 +129,8 @@ define void @vecaddr_loop(i32 zeroext %a, ptr %p) { ; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vle32.v v8, (s0) ; RV64-NEXT: vadd.vi v8, v8, 1 -; RV64-NEXT: li a1, 57 ; RV64-NEXT: vse32.v v8, (s0) +; RV64-NEXT: li a1, 57 ; RV64-NEXT: beq a0, a1, .LBB1_2 ; RV64-NEXT: .LBB1_1: # %do_call ; RV64-NEXT: # =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll index 1948675ae9cf0..c0792566160ba 100644 --- a/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll +++ b/llvm/test/CodeGen/RISCV/rvv/sink-splat-operands.ll @@ -1457,19 +1457,19 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fmul_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a3, .LBB26_2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bgeu a1, a3, .LBB26_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB26_5 ; CHECK-NEXT: .LBB26_2: # %vector.ph -; CHECK-NEXT: addi a2, a3, -1 -; CHECK-NEXT: andi a4, a2, 1024 -; CHECK-NEXT: xori a2, a4, 1024 +; CHECK-NEXT: addi a1, a3, -1 +; CHECK-NEXT: andi a4, a1, 1024 +; CHECK-NEXT: xori a1, a4, 1024 ; CHECK-NEXT: mv a5, a0 -; CHECK-NEXT: mv a6, a2 +; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB26_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1477,12 +1477,12 @@ define void @sink_splat_fmul_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: sub a6, a6, a3 ; CHECK-NEXT: vfmul.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: bnez a6, .LBB26_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB26_7 ; CHECK-NEXT: .LBB26_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: add a0, a0, a2 @@ -1547,19 +1547,19 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a3, .LBB27_2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bgeu a1, a3, .LBB27_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB27_5 ; CHECK-NEXT: .LBB27_2: # %vector.ph -; CHECK-NEXT: addi a2, a3, -1 -; CHECK-NEXT: andi a4, a2, 1024 -; CHECK-NEXT: xori a2, a4, 1024 +; CHECK-NEXT: addi a1, a3, -1 +; CHECK-NEXT: andi a4, a1, 1024 +; CHECK-NEXT: xori a1, a4, 1024 ; CHECK-NEXT: mv a5, a0 -; CHECK-NEXT: mv a6, a2 +; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB27_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1567,12 +1567,12 @@ define void @sink_splat_fdiv_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: sub a6, a6, a3 ; CHECK-NEXT: vfdiv.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: bnez a6, .LBB27_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB27_7 ; CHECK-NEXT: .LBB27_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: add a0, a0, a2 @@ -1637,19 +1637,19 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frdiv_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a3, .LBB28_2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bgeu a1, a3, .LBB28_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB28_5 ; CHECK-NEXT: .LBB28_2: # %vector.ph -; CHECK-NEXT: addi a2, a3, -1 -; CHECK-NEXT: andi a4, a2, 1024 -; CHECK-NEXT: xori a2, a4, 1024 +; CHECK-NEXT: addi a1, a3, -1 +; CHECK-NEXT: andi a4, a1, 1024 +; CHECK-NEXT: xori a1, a4, 1024 ; CHECK-NEXT: mv a5, a0 -; CHECK-NEXT: mv a6, a2 +; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB28_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1657,12 +1657,12 @@ define void @sink_splat_frdiv_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: sub a6, a6, a3 ; CHECK-NEXT: vfrdiv.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: bnez a6, .LBB28_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB28_7 ; CHECK-NEXT: .LBB28_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: add a0, a0, a2 @@ -1727,19 +1727,19 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fadd_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a3, .LBB29_2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bgeu a1, a3, .LBB29_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB29_5 ; CHECK-NEXT: .LBB29_2: # %vector.ph -; CHECK-NEXT: addi a2, a3, -1 -; CHECK-NEXT: andi a4, a2, 1024 -; CHECK-NEXT: xori a2, a4, 1024 +; CHECK-NEXT: addi a1, a3, -1 +; CHECK-NEXT: andi a4, a1, 1024 +; CHECK-NEXT: xori a1, a4, 1024 ; CHECK-NEXT: mv a5, a0 -; CHECK-NEXT: mv a6, a2 +; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB29_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1747,12 +1747,12 @@ define void @sink_splat_fadd_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: sub a6, a6, a3 ; CHECK-NEXT: vfadd.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: bnez a6, .LBB29_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB29_7 ; CHECK-NEXT: .LBB29_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: add a0, a0, a2 @@ -1817,19 +1817,19 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_fsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a3, .LBB30_2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bgeu a1, a3, .LBB30_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB30_5 ; CHECK-NEXT: .LBB30_2: # %vector.ph -; CHECK-NEXT: addi a2, a3, -1 -; CHECK-NEXT: andi a4, a2, 1024 -; CHECK-NEXT: xori a2, a4, 1024 +; CHECK-NEXT: addi a1, a3, -1 +; CHECK-NEXT: andi a4, a1, 1024 +; CHECK-NEXT: xori a1, a4, 1024 ; CHECK-NEXT: mv a5, a0 -; CHECK-NEXT: mv a6, a2 +; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB30_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1837,12 +1837,12 @@ define void @sink_splat_fsub_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: sub a6, a6, a3 ; CHECK-NEXT: vfsub.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: bnez a6, .LBB30_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB30_7 ; CHECK-NEXT: .LBB30_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: add a0, a0, a2 @@ -1907,19 +1907,19 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) { ; CHECK-LABEL: sink_splat_frsub_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 2 -; CHECK-NEXT: li a2, 1024 -; CHECK-NEXT: bgeu a2, a3, .LBB31_2 +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: srli a3, a2, 2 +; CHECK-NEXT: li a1, 1024 +; CHECK-NEXT: bgeu a1, a3, .LBB31_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a2, 0 +; CHECK-NEXT: li a1, 0 ; CHECK-NEXT: j .LBB31_5 ; CHECK-NEXT: .LBB31_2: # %vector.ph -; CHECK-NEXT: addi a2, a3, -1 -; CHECK-NEXT: andi a4, a2, 1024 -; CHECK-NEXT: xori a2, a4, 1024 +; CHECK-NEXT: addi a1, a3, -1 +; CHECK-NEXT: andi a4, a1, 1024 +; CHECK-NEXT: xori a1, a4, 1024 ; CHECK-NEXT: mv a5, a0 -; CHECK-NEXT: mv a6, a2 +; CHECK-NEXT: mv a6, a1 ; CHECK-NEXT: vsetvli a7, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB31_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 @@ -1927,12 +1927,12 @@ define void @sink_splat_frsub_scalable(ptr nocapture %a, float %x) { ; CHECK-NEXT: sub a6, a6, a3 ; CHECK-NEXT: vfrsub.vf v8, v8, fa0 ; CHECK-NEXT: vs1r.v v8, (a5) -; CHECK-NEXT: add a5, a5, a1 +; CHECK-NEXT: add a5, a5, a2 ; CHECK-NEXT: bnez a6, .LBB31_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a4, .LBB31_7 ; CHECK-NEXT: .LBB31_5: # %for.body.preheader -; CHECK-NEXT: slli a1, a2, 2 +; CHECK-NEXT: slli a1, a1, 2 ; CHECK-NEXT: lui a2, 1 ; CHECK-NEXT: add a1, a0, a1 ; CHECK-NEXT: add a0, a0, a2 @@ -2073,35 +2073,35 @@ for.cond.cleanup: ; preds = %vector.body define void @sink_splat_fma_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a4, a2, 2 -; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a4, .LBB34_2 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: bgeu a2, a4, .LBB34_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB34_5 ; CHECK-NEXT: .LBB34_2: # %vector.ph -; CHECK-NEXT: addi a3, a4, -1 -; CHECK-NEXT: andi a5, a3, 1024 -; CHECK-NEXT: xori a3, a5, 1024 +; CHECK-NEXT: addi a2, a4, -1 +; CHECK-NEXT: andi a5, a2, 1024 +; CHECK-NEXT: xori a2, a5, 1024 ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a1 -; CHECK-NEXT: mv t0, a3 +; CHECK-NEXT: mv t0, a2 ; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB34_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: sub t0, t0, a4 -; CHECK-NEXT: add a7, a7, a2 +; CHECK-NEXT: add a7, a7, a3 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vs1r.v v9, (a6) -; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a6, a6, a3 ; CHECK-NEXT: bnez t0, .LBB34_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a5, .LBB34_7 ; CHECK-NEXT: .LBB34_5: # %for.body.preheader -; CHECK-NEXT: slli a2, a3, 2 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: lui a3, 1 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: add a2, a1, a2 @@ -2173,35 +2173,35 @@ for.body: ; preds = %for.body.preheader, define void @sink_splat_fma_commute_scalable(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, float %x) { ; CHECK-LABEL: sink_splat_fma_commute_scalable: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: csrr a2, vlenb -; CHECK-NEXT: srli a4, a2, 2 -; CHECK-NEXT: li a3, 1024 -; CHECK-NEXT: bgeu a3, a4, .LBB35_2 +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: srli a4, a3, 2 +; CHECK-NEXT: li a2, 1024 +; CHECK-NEXT: bgeu a2, a4, .LBB35_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: li a3, 0 +; CHECK-NEXT: li a2, 0 ; CHECK-NEXT: j .LBB35_5 ; CHECK-NEXT: .LBB35_2: # %vector.ph -; CHECK-NEXT: addi a3, a4, -1 -; CHECK-NEXT: andi a5, a3, 1024 -; CHECK-NEXT: xori a3, a5, 1024 +; CHECK-NEXT: addi a2, a4, -1 +; CHECK-NEXT: andi a5, a2, 1024 +; CHECK-NEXT: xori a2, a5, 1024 ; CHECK-NEXT: mv a6, a0 ; CHECK-NEXT: mv a7, a1 -; CHECK-NEXT: mv t0, a3 +; CHECK-NEXT: mv t0, a2 ; CHECK-NEXT: vsetvli t1, zero, e32, m1, ta, ma ; CHECK-NEXT: .LBB35_3: # %vector.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vl1re32.v v8, (a6) ; CHECK-NEXT: vl1re32.v v9, (a7) ; CHECK-NEXT: sub t0, t0, a4 -; CHECK-NEXT: add a7, a7, a2 +; CHECK-NEXT: add a7, a7, a3 ; CHECK-NEXT: vfmacc.vf v9, fa0, v8 ; CHECK-NEXT: vs1r.v v9, (a6) -; CHECK-NEXT: add a6, a6, a2 +; CHECK-NEXT: add a6, a6, a3 ; CHECK-NEXT: bnez t0, .LBB35_3 ; CHECK-NEXT: # %bb.4: # %middle.block ; CHECK-NEXT: beqz a5, .LBB35_7 ; CHECK-NEXT: .LBB35_5: # %for.body.preheader -; CHECK-NEXT: slli a2, a3, 2 +; CHECK-NEXT: slli a2, a2, 2 ; CHECK-NEXT: lui a3, 1 ; CHECK-NEXT: add a0, a0, a2 ; CHECK-NEXT: add a2, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll index 62339130678d0..86cf1ee04b60a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/stepvector.ll @@ -561,14 +561,14 @@ define @add_stepvector_nxv16i64() { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vid.v v8 ; RV32-NEXT: slli a0, a0, 1 +; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vid.v v8 -; RV32-NEXT: vadd.vv v8, v8, v8 ; RV32-NEXT: vadd.vv v16, v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -596,16 +596,16 @@ define @mul_stepvector_nxv16i64() { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vid.v v8 +; RV32-NEXT: li a1, 3 +; RV32-NEXT: vmul.vx v8, v8, a1 ; RV32-NEXT: slli a1, a0, 1 ; RV32-NEXT: add a0, a1, a0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vid.v v8 -; RV32-NEXT: li a0, 3 -; RV32-NEXT: vmul.vx v8, v8, a0 ; RV32-NEXT: vadd.vv v16, v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -649,16 +649,16 @@ define @mul_bigimm_stepvector_nxv16i64() { ; RV32-NEXT: slli a3, a0, 1 ; RV32-NEXT: slli a0, a0, 6 ; RV32-NEXT: sub a0, a0, a3 +; RV32-NEXT: addi a3, sp, 8 +; RV32-NEXT: vsetvli a4, zero, e64, m8, ta, ma +; RV32-NEXT: vid.v v8 ; RV32-NEXT: add a0, a1, a0 -; RV32-NEXT: addi a1, sp, 8 +; RV32-NEXT: mv a1, sp ; RV32-NEXT: sw a2, 0(sp) ; RV32-NEXT: sw a0, 4(sp) -; RV32-NEXT: vsetvli a0, zero, e64, m8, ta, ma -; RV32-NEXT: vlse64.v v8, (a1), zero -; RV32-NEXT: mv a0, sp -; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vid.v v24 -; RV32-NEXT: vmul.vv v8, v24, v8 +; RV32-NEXT: vlse64.v v16, (a3), zero +; RV32-NEXT: vmul.vv v8, v8, v16 +; RV32-NEXT: vlse64.v v16, (a1), zero ; RV32-NEXT: vadd.vv v16, v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 @@ -689,14 +689,14 @@ define @shl_stepvector_nxv16i64() { ; RV32-NEXT: addi sp, sp, -16 ; RV32-NEXT: .cfi_def_cfa_offset 16 ; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma +; RV32-NEXT: vid.v v8 ; RV32-NEXT: slli a0, a0, 2 +; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw zero, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vid.v v8 -; RV32-NEXT: vsll.vi v8, v8, 2 ; RV32-NEXT: vadd.vv v16, v8, v16 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll index ecd098edb30ae..881a8795cc504 100644 --- a/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/strided-vpload.ll @@ -676,9 +676,9 @@ define @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv16f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, @strided_load_nxv17f64(ptr %ptr, i64 %stride, %v, ptr %ptr, i32 sig ; CHECK-NEXT: mul a4, a4, a1 ; CHECK-NEXT: srli a3, a3, 3 ; CHECK-NEXT: sltu a2, a2, a5 -; CHECK-NEXT: vsetvli a6, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v0, a3 +; CHECK-NEXT: add a0, a0, a4 ; CHECK-NEXT: addi a2, a2, -1 ; CHECK-NEXT: and a2, a2, a5 -; CHECK-NEXT: add a0, a0, a4 +; CHECK-NEXT: vsetvli a4, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v0, a3 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a0), a1, v0.t ; CHECK-NEXT: ret @@ -646,21 +646,21 @@ define void @strided_store_nxv17f64( %v, ptr %ptr, i32 sig ; CHECK-NEXT: sltu a3, a3, a6 ; CHECK-NEXT: addi t0, t0, -1 ; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: and t0, t0, a0 -; CHECK-NEXT: and a0, a3, a6 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload -; CHECK-NEXT: vsetvli zero, t0, e64, m8, ta, ma +; CHECK-NEXT: and a0, t0, a0 +; CHECK-NEXT: addi t0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (t0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v8, (a7), a2, v0.t +; CHECK-NEXT: and a0, a3, a6 ; CHECK-NEXT: bltu a0, a4, .LBB48_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a0, a4 ; CHECK-NEXT: .LBB48_6: ; CHECK-NEXT: mul a3, a5, a2 ; CHECK-NEXT: srli a4, a4, 2 -; CHECK-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v24, a4 ; CHECK-NEXT: add a1, a1, a3 +; CHECK-NEXT: vsetvli a3, zero, e8, mf2, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v24, a4 ; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma ; CHECK-NEXT: vsse64.v v16, (a1), a2, v0.t ; CHECK-NEXT: csrr a0, vlenb diff --git a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll index 68e0c0089d0c7..a5dd27149c1f2 100644 --- a/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/umulo-sdnode.ll @@ -7,10 +7,10 @@ define @umulo_nxv1i8( %x, % ; CHECK-LABEL: umulo_nxv1i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv1i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -25,10 +25,10 @@ define @umulo_nxv2i8( %x, % ; CHECK-LABEL: umulo_nxv2i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv2i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -43,10 +43,10 @@ define @umulo_nxv4i8( %x, % ; CHECK-LABEL: umulo_nxv4i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv4i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -61,10 +61,10 @@ define @umulo_nxv8i8( %x, % ; CHECK-LABEL: umulo_nxv8i8: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; CHECK-NEXT: vmulhu.vv v10, v8, v9 -; CHECK-NEXT: vmsne.vi v0, v10, 0 -; CHECK-NEXT: vmul.vv v8, v8, v9 -; CHECK-NEXT: vmerge.vim v8, v8, 0, v0 +; CHECK-NEXT: vmul.vv v10, v8, v9 +; CHECK-NEXT: vmulhu.vv v8, v8, v9 +; CHECK-NEXT: vmsne.vi v0, v8, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 0, v0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv8i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -79,10 +79,10 @@ define @umulo_nxv16i8( %x, , } @llvm.umul.with.overflow.nxv16i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -97,10 +97,10 @@ define @umulo_nxv32i8( %x, , } @llvm.umul.with.overflow.nxv32i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -115,10 +115,10 @@ define @umulo_nxv64i8( %x, , } @llvm.umul.with.overflow.nxv64i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -133,10 +133,10 @@ define @umulo_nxv1i16( %x, , } @llvm.umul.with.overflow.nxv1i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -151,10 +151,10 @@ define @umulo_nxv2i16( %x, , } @llvm.umul.with.overflow.nxv2i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -169,10 +169,10 @@ define @umulo_nxv4i16( %x, , } @llvm.umul.with.overflow.nxv4i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -187,10 +187,10 @@ define @umulo_nxv8i16( %x, , } @llvm.umul.with.overflow.nxv8i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -205,10 +205,10 @@ define @umulo_nxv16i16( %x, , } @llvm.umul.with.overflow.nxv16i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -223,10 +223,10 @@ define @umulo_nxv32i16( %x, , } @llvm.umul.with.overflow.nxv32i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -241,10 +241,10 @@ define @umulo_nxv1i32( %x, , } @llvm.umul.with.overflow.nxv1i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -259,10 +259,10 @@ define @umulo_nxv2i32( %x, , } @llvm.umul.with.overflow.nxv2i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -277,10 +277,10 @@ define @umulo_nxv4i32( %x, , } @llvm.umul.with.overflow.nxv4i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -295,10 +295,10 @@ define @umulo_nxv8i32( %x, , } @llvm.umul.with.overflow.nxv8i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -313,10 +313,10 @@ define @umulo_nxv16i32( %x, , } @llvm.umul.with.overflow.nxv16i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -331,10 +331,10 @@ define @umulo_nxv1i64( %x, , } @llvm.umul.with.overflow.nxv1i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -349,10 +349,10 @@ define @umulo_nxv2i64( %x, , } @llvm.umul.with.overflow.nxv2i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -367,10 +367,10 @@ define @umulo_nxv4i64( %x, , } @llvm.umul.with.overflow.nxv4i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -385,10 +385,10 @@ define @umulo_nxv8i64( %x, , } @llvm.umul.with.overflow.nxv8i64( %x, %y) %b = extractvalue { , } %a, 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll index 0bd82e654e021..2c89e939940b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/urem-seteq-vec.ll @@ -10,13 +10,13 @@ define @test_urem_vec_even_divisor_eq0( %x) ; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV32-NEXT: vmul.vx v8, v8, a0 ; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, -1366 ; RV32-NEXT: vsll.vi v9, v8, 15 ; RV32-NEXT: vsrl.vi v8, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: addi a0, a0, -1366 ; RV32-NEXT: vmsgtu.vx v0, v8, a0 -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, -1, v0 +; RV32-NEXT: vmerge.vim v8, v9, -1, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_urem_vec_even_divisor_eq0: @@ -26,13 +26,13 @@ define @test_urem_vec_even_divisor_eq0( %x) ; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addi a0, a0, -1366 ; RV64-NEXT: vsll.vi v9, v8, 15 ; RV64-NEXT: vsrl.vi v8, v8, 1 ; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: addi a0, a0, -1366 ; RV64-NEXT: vmsgtu.vx v0, v8, a0 -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 +; RV64-NEXT: vmerge.vim v8, v9, -1, v0 ; RV64-NEXT: ret %urem = urem %x, splat (i16 6) %cmp = icmp ne %urem, splat (i16 0) @@ -48,10 +48,10 @@ define @test_urem_vec_odd_divisor_eq0( %x) ; RV32-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV32-NEXT: vmul.vx v8, v8, a0 ; RV32-NEXT: lui a0, 3 +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: addi a0, a0, 819 ; RV32-NEXT: vmsgtu.vx v0, v8, a0 -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, -1, v0 +; RV32-NEXT: vmerge.vim v8, v9, -1, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_urem_vec_odd_divisor_eq0: @@ -61,10 +61,10 @@ define @test_urem_vec_odd_divisor_eq0( %x) ; RV64-NEXT: vsetvli a1, zero, e16, mf4, ta, ma ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: lui a0, 3 +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: addi a0, a0, 819 ; RV64-NEXT: vmsgtu.vx v0, v8, a0 -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 +; RV64-NEXT: vmerge.vim v8, v9, -1, v0 ; RV64-NEXT: ret %urem = urem %x, splat (i16 5) %cmp = icmp ne %urem, splat (i16 0) @@ -82,13 +82,13 @@ define @test_urem_vec_even_divisor_eq1( %x) ; RV32-NEXT: addi a0, a0, -1365 ; RV32-NEXT: vmul.vx v8, v8, a0 ; RV32-NEXT: lui a0, 3 -; RV32-NEXT: addi a0, a0, -1366 ; RV32-NEXT: vsll.vi v9, v8, 15 ; RV32-NEXT: vsrl.vi v8, v8, 1 ; RV32-NEXT: vor.vv v8, v8, v9 +; RV32-NEXT: vmv.v.i v9, 0 +; RV32-NEXT: addi a0, a0, -1366 ; RV32-NEXT: vmsgtu.vx v0, v8, a0 -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, -1, v0 +; RV32-NEXT: vmerge.vim v8, v9, -1, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_urem_vec_even_divisor_eq1: @@ -100,13 +100,13 @@ define @test_urem_vec_even_divisor_eq1( %x) ; RV64-NEXT: addi a0, a0, -1365 ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: lui a0, 3 -; RV64-NEXT: addi a0, a0, -1366 ; RV64-NEXT: vsll.vi v9, v8, 15 ; RV64-NEXT: vsrl.vi v8, v8, 1 ; RV64-NEXT: vor.vv v8, v8, v9 +; RV64-NEXT: vmv.v.i v9, 0 +; RV64-NEXT: addi a0, a0, -1366 ; RV64-NEXT: vmsgtu.vx v0, v8, a0 -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 +; RV64-NEXT: vmerge.vim v8, v9, -1, v0 ; RV64-NEXT: ret %urem = urem %x, splat (i16 6) %cmp = icmp ne %urem, splat (i16 1) @@ -124,10 +124,10 @@ define @test_urem_vec_odd_divisor_eq1( %x) ; RV32-NEXT: addi a0, a0, -819 ; RV32-NEXT: vmul.vx v8, v8, a0 ; RV32-NEXT: lui a0, 3 +; RV32-NEXT: vmv.v.i v9, 0 ; RV32-NEXT: addi a0, a0, 818 ; RV32-NEXT: vmsgtu.vx v0, v8, a0 -; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, -1, v0 +; RV32-NEXT: vmerge.vim v8, v9, -1, v0 ; RV32-NEXT: ret ; ; RV64-LABEL: test_urem_vec_odd_divisor_eq1: @@ -139,10 +139,10 @@ define @test_urem_vec_odd_divisor_eq1( %x) ; RV64-NEXT: addi a0, a0, -819 ; RV64-NEXT: vmul.vx v8, v8, a0 ; RV64-NEXT: lui a0, 3 +; RV64-NEXT: vmv.v.i v9, 0 ; RV64-NEXT: addi a0, a0, 818 ; RV64-NEXT: vmsgtu.vx v0, v8, a0 -; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 +; RV64-NEXT: vmerge.vim v8, v9, -1, v0 ; RV64-NEXT: ret %urem = urem %x, splat (i16 5) %cmp = icmp ne %urem, splat (i16 1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll index 77f3cf3ca4980..cd1609f90c6b7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vadd-vp.ll @@ -1442,12 +1442,11 @@ define @vadd_vi_nxv32i32_evl_nx16( %va, < ; RV64-LABEL: vadd_vi_nxv32i32_evl_nx16: ; RV64: # %bb.0: ; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma +; RV64-NEXT: vadd.vi v8, v8, -1, v0.t ; RV64-NEXT: srli a0, a0, 2 ; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a0 -; RV64-NEXT: vsetvli a0, zero, e32, m8, ta, ma -; RV64-NEXT: vadd.vi v8, v8, -1, v0.t -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vslidedown.vx v0, v0, a0 ; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vadd.vi v16, v16, -1, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index 9f0b2b3914836..6e9826b2fcdb3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -204,19 +204,19 @@ define {<2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>, <2 x i16>} @vector_deinterle ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v14, v8, 8 ; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a2, a1, a1 ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v11, v10, a1 ; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a2, a0, a0 +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v11, a0 -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v14 -; CHECK-NEXT: vs2r.v v8, (a0) -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v8, (a0) +; CHECK-NEXT: vs2r.v v8, (a1) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vlseg5e16.v v8, (a1) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 @@ -576,19 +576,19 @@ define {<2 x half>, <2 x half>, <2 x half>, <2 x half>, <2 x half>} @vector_dein ; CHECK-NEXT: vsetivli zero, 2, e16, m2, ta, ma ; CHECK-NEXT: vslidedown.vi v14, v8, 8 ; CHECK-NEXT: srli a1, a0, 3 -; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: add a2, a1, a1 ; CHECK-NEXT: vsetvli zero, a2, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v11, v10, a1 ; CHECK-NEXT: vslideup.vx v8, v12, a1 -; CHECK-NEXT: add a1, a0, a0 -; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: srli a0, a0, 2 +; CHECK-NEXT: add a2, a0, a0 +; CHECK-NEXT: vsetvli zero, a2, e16, m1, ta, ma ; CHECK-NEXT: vslideup.vx v8, v11, a0 -; CHECK-NEXT: addi a0, sp, 16 ; CHECK-NEXT: vmv1r.v v9, v14 -; CHECK-NEXT: vs2r.v v8, (a0) -; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v8, (a0) +; CHECK-NEXT: vs2r.v v8, (a1) +; CHECK-NEXT: vsetvli a0, zero, e16, mf4, ta, ma +; CHECK-NEXT: vlseg5e16.v v8, (a1) ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll index 14f306da21dba..55359e82e9720 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-load.ll @@ -12,11 +12,12 @@ define {, } @vector_deinterleave_load_nxv16i ; CHECK-NEXT: vsetvli a1, zero, e8, m4, ta, ma ; CHECK-NEXT: vlm.v v8, (a0) ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, ma +; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vslidedown.vx v0, v8, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 ; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 ; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 @@ -134,44 +135,62 @@ define {, } @vector_deinterleave_load_nxv8i6 ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 +; CHECK-NEXT: slli a2, a1, 4 +; CHECK-NEXT: add a1, a2, a1 ; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb ; CHECK-NEXT: li a1, 85 ; CHECK-NEXT: vsetvli a2, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v16, a1 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: csrr a2, vlenb +; CHECK-NEXT: slli a2, a2, 3 +; CHECK-NEXT: add a2, sp, a2 +; CHECK-NEXT: addi a2, a2, 16 +; CHECK-NEXT: vs8r.v v24, (a2) # Unknown-size Folded Spill ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a0, a0, a1 ; CHECK-NEXT: li a1, 170 ; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: vmv.v.x v17, a1 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma ; CHECK-NEXT: vcompress.vm v8, v24, v16 ; CHECK-NEXT: vmv1r.v v12, v16 -; CHECK-NEXT: vmv1r.v v13, v17 -; CHECK-NEXT: vcompress.vm v16, v24, v13 -; CHECK-NEXT: vcompress.vm v24, v0, v12 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vcompress.vm v24, v0, v13 +; CHECK-NEXT: vcompress.vm v16, v0, v12 +; CHECK-NEXT: vmv4r.v v12, v16 +; CHECK-NEXT: vsetvli a0, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v16, a1 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v24, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v12, v24 +; CHECK-NEXT: vs1r.v v16, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v20, v24 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vcompress.vm v16, v0, v24 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v20, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v0, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vcompress.vm v24, v0, v20 +; CHECK-NEXT: vmv4r.v v20, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll index 81b6de9e662d5..ea1a6fe03501b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave.ll @@ -10,15 +10,14 @@ define {, } @vector_deinterleave_nxv16i1_nxv ; CHECK-LABEL: vector_deinterleave_nxv16i1_nxv32i1: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmv.v.i v10, 0 +; CHECK-NEXT: vmv.v.i v8, 0 ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: vmerge.vim v12, v8, 1, v0 ; CHECK-NEXT: srli a0, a0, 2 ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v0, a0 +; CHECK-NEXT: vslidedown.vx v0, v0, a0 ; CHECK-NEXT: vsetvli a0, zero, e8, m2, ta, ma -; CHECK-NEXT: vmerge.vim v12, v10, 1, v0 -; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 +; CHECK-NEXT: vmerge.vim v14, v8, 1, v0 ; CHECK-NEXT: vnsrl.wi v8, v12, 0 ; CHECK-NEXT: vnsrl.wi v10, v12, 8 ; CHECK-NEXT: vmsne.vi v0, v8, 0 @@ -179,41 +178,63 @@ define {, } @vector_deinterleave_nxv8i64_nxv ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v7, a0 ; CHECK-NEXT: li a0, 170 -; CHECK-NEXT: vmv.v.x v6, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vcompress.vm v24, v8, v7 ; CHECK-NEXT: vmv1r.v v28, v7 -; CHECK-NEXT: vmv1r.v v29, v6 -; CHECK-NEXT: vcompress.vm v0, v8, v29 -; CHECK-NEXT: vcompress.vm v8, v16, v28 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vcompress.vm v8, v16, v29 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vcompress.vm v0, v16, v28 +; CHECK-NEXT: vmv4r.v v28, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v7, a0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v28, v8 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vcompress.vm v0, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vcompress.vm v8, v16, v24 ; CHECK-NEXT: vmv4r.v v4, v8 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -401,41 +422,63 @@ define {, } @vector_deinterleave_nxv8f ; CHECK-NEXT: addi sp, sp, -16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: sub sp, sp, a0 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb +; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x11, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 17 * vlenb +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vs8r.v v16, (a0) # Unknown-size Folded Spill ; CHECK-NEXT: li a0, 85 ; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma ; CHECK-NEXT: vmv.v.x v7, a0 ; CHECK-NEXT: li a0, 170 -; CHECK-NEXT: vmv.v.x v6, a0 -; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; CHECK-NEXT: vcompress.vm v24, v8, v7 ; CHECK-NEXT: vmv1r.v v28, v7 -; CHECK-NEXT: vmv1r.v v29, v6 -; CHECK-NEXT: vcompress.vm v0, v8, v29 -; CHECK-NEXT: vcompress.vm v8, v16, v28 -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vcompress.vm v8, v16, v29 +; CHECK-NEXT: csrr a1, vlenb +; CHECK-NEXT: slli a1, a1, 3 +; CHECK-NEXT: add a1, sp, a1 +; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload +; CHECK-NEXT: vcompress.vm v0, v16, v28 +; CHECK-NEXT: vmv4r.v v28, v0 +; CHECK-NEXT: addi a1, sp, 16 +; CHECK-NEXT: vs8r.v v24, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: vsetvli a1, zero, e8, mf8, ta, ma +; CHECK-NEXT: vmv.v.x v7, a0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 +; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vs8r.v v8, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: vmv4r.v v28, v8 +; CHECK-NEXT: vs1r.v v7, (a0) # Unknown-size Folded Spill +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vsetvli a0, zero, e64, m8, ta, ma +; CHECK-NEXT: vcompress.vm v0, v8, v24 +; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: add a0, sp, a0 +; CHECK-NEXT: addi a0, a0, 16 +; CHECK-NEXT: vl1r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 3 ; CHECK-NEXT: add a0, sp, a0 ; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vcompress.vm v8, v16, v24 ; CHECK-NEXT: vmv4r.v v4, v8 -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vmv8r.v v16, v0 ; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 +; CHECK-NEXT: slli a1, a0, 4 +; CHECK-NEXT: add a0, a1, a0 ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 ; CHECK-NEXT: addi sp, sp, 16 @@ -578,41 +621,41 @@ define {, , , , , , , , , , , , , , , , , , , , , , , , , , , , , , %data, <16 x i8> %mask, i8 %passthru) { ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vcpop.m a1, v0 ; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: vcpop.m a1, v0 ; CHECK-NEXT: beqz a1, .LBB0_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 @@ -31,8 +31,8 @@ define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) { ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, mu ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vcpop.m a1, v0 ; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: vcpop.m a1, v0 ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 @@ -55,8 +55,8 @@ define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) { ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vcpop.m a1, v0 ; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: vcpop.m a1, v0 ; CHECK-NEXT: beqz a1, .LBB2_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 @@ -79,8 +79,8 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; RV32-NEXT: vmsne.vi v0, v9, 0 ; RV32-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: vcpop.m a2, v0 ; RV32-NEXT: vid.v v9, v0.t +; RV32-NEXT: vcpop.m a2, v0 ; RV32-NEXT: beqz a2, .LBB3_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: vredmaxu.vs v9, v9, v9 @@ -102,8 +102,8 @@ define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { ; RV64-NEXT: vmsne.vi v0, v9, 0 ; RV64-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; RV64-NEXT: vmv.v.i v9, 0 -; RV64-NEXT: vcpop.m a1, v0 ; RV64-NEXT: vid.v v9, v0.t +; RV64-NEXT: vcpop.m a1, v0 ; RV64-NEXT: beqz a1, .LBB3_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: vredmaxu.vs v9, v9, v9 @@ -126,8 +126,8 @@ define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %pass ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, mu ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB4_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 @@ -150,8 +150,8 @@ define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double % ; CHECK-NEXT: vmsne.vi v0, v9, 0 ; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, mu ; CHECK-NEXT: vmv.v.i v9, 0 -; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: vid.v v9, v0.t +; CHECK-NEXT: vcpop.m a0, v0 ; CHECK-NEXT: beqz a0, .LBB5_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vredmaxu.vs v9, v9, v9 @@ -172,8 +172,8 @@ define i8 @extract_last_i8_scalable( %data, ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e8, m2, ta, mu ; CHECK-NEXT: vmv.v.i v10, 0 -; CHECK-NEXT: vcpop.m a1, v0 ; CHECK-NEXT: vid.v v10, v0.t +; CHECK-NEXT: vcpop.m a1, v0 ; CHECK-NEXT: beqz a1, .LBB6_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: vredmaxu.vs v10, v10, v10 @@ -193,8 +193,8 @@ define i16 @extract_last_i16_scalable( %data, %data, %data, %data, %data, %data, @vector_interleave5_v10i16_v2i16(<2 x i16> %a, <2 x i16> %b, < ; CHECK-NEXT: vsetvli a4, zero, e16, mf4, ta, ma ; CHECK-NEXT: vsseg5e16.v v8, (a0) ; CHECK-NEXT: add a4, a3, a1 +; CHECK-NEXT: add a1, a4, a1 ; CHECK-NEXT: vle16.v v9, (a2) ; CHECK-NEXT: vle16.v v10, (a4) ; CHECK-NEXT: vle16.v v11, (a3) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: add a1, a4, a1 ; CHECK-NEXT: vle16.v v12, (a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v11, v10, 2 @@ -241,11 +241,11 @@ define <10 x i16> @vector_interleave5_v10i16_v2i16(<2 x i16> %a, <2 x i16> %b, < ; ZVBB-NEXT: vsetvli a4, zero, e16, mf4, ta, ma ; ZVBB-NEXT: vsseg5e16.v v8, (a0) ; ZVBB-NEXT: add a4, a3, a1 +; ZVBB-NEXT: add a1, a4, a1 ; ZVBB-NEXT: vle16.v v9, (a2) ; ZVBB-NEXT: vle16.v v10, (a4) ; ZVBB-NEXT: vle16.v v11, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: add a1, a4, a1 ; ZVBB-NEXT: vle16.v v12, (a1) ; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVBB-NEXT: vslideup.vi v11, v10, 2 @@ -283,24 +283,24 @@ define <14 x i8> @vector_interleave7_v14i8_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i ; CHECK-NEXT: vsseg7e8.v v8, (a0) ; CHECK-NEXT: vle8.v v9, (a4) ; CHECK-NEXT: add a4, a4, a1 -; CHECK-NEXT: vle8.v v10, (a2) -; CHECK-NEXT: add a2, a4, a1 -; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: vle8.v v10, (a3) +; CHECK-NEXT: add a3, a4, a1 ; CHECK-NEXT: vle8.v v11, (a2) -; CHECK-NEXT: vle8.v v12, (a4) +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: vle8.v v12, (a3) +; CHECK-NEXT: vle8.v v13, (a4) ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vle8.v v13, (a1) -; CHECK-NEXT: vle8.v v14, (a3) +; CHECK-NEXT: vle8.v v14, (a1) ; CHECK-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v12, v11, 2 -; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vslideup.vi v13, v12, 2 +; CHECK-NEXT: vslideup.vi v8, v11, 2 ; CHECK-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v12, v13, 4 -; CHECK-NEXT: vslideup.vi v8, v14, 4 +; CHECK-NEXT: vslideup.vi v13, v14, 4 +; CHECK-NEXT: vslideup.vi v8, v10, 4 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 6 ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 8 +; CHECK-NEXT: vslideup.vi v8, v13, 8 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: add sp, sp, a0 ; CHECK-NEXT: .cfi_def_cfa sp, 16 @@ -325,24 +325,24 @@ define <14 x i8> @vector_interleave7_v14i8_v2i8(<2 x i8> %a, <2 x i8> %b, <2 x i ; ZVBB-NEXT: vsseg7e8.v v8, (a0) ; ZVBB-NEXT: vle8.v v9, (a4) ; ZVBB-NEXT: add a4, a4, a1 -; ZVBB-NEXT: vle8.v v10, (a2) -; ZVBB-NEXT: add a2, a4, a1 -; ZVBB-NEXT: add a1, a2, a1 +; ZVBB-NEXT: vle8.v v10, (a3) +; ZVBB-NEXT: add a3, a4, a1 ; ZVBB-NEXT: vle8.v v11, (a2) -; ZVBB-NEXT: vle8.v v12, (a4) +; ZVBB-NEXT: add a1, a3, a1 +; ZVBB-NEXT: vle8.v v12, (a3) +; ZVBB-NEXT: vle8.v v13, (a4) ; ZVBB-NEXT: vle8.v v8, (a0) -; ZVBB-NEXT: vle8.v v13, (a1) -; ZVBB-NEXT: vle8.v v14, (a3) +; ZVBB-NEXT: vle8.v v14, (a1) ; ZVBB-NEXT: vsetivli zero, 4, e8, mf2, tu, ma -; ZVBB-NEXT: vslideup.vi v12, v11, 2 -; ZVBB-NEXT: vslideup.vi v8, v10, 2 +; ZVBB-NEXT: vslideup.vi v13, v12, 2 +; ZVBB-NEXT: vslideup.vi v8, v11, 2 ; ZVBB-NEXT: vsetivli zero, 6, e8, mf2, tu, ma -; ZVBB-NEXT: vslideup.vi v12, v13, 4 -; ZVBB-NEXT: vslideup.vi v8, v14, 4 +; ZVBB-NEXT: vslideup.vi v13, v14, 4 +; ZVBB-NEXT: vslideup.vi v8, v10, 4 ; ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; ZVBB-NEXT: vslideup.vi v8, v9, 6 ; ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma -; ZVBB-NEXT: vslideup.vi v8, v12, 8 +; ZVBB-NEXT: vslideup.vi v8, v13, 8 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: add sp, sp, a0 ; ZVBB-NEXT: .cfi_def_cfa sp, 16 @@ -579,11 +579,11 @@ define <10 x half> @vector_interleave5_v10f16_v2f16(<2 x half> %a, <2 x half> %b ; CHECK-NEXT: vsetvli a4, zero, e16, mf4, ta, ma ; CHECK-NEXT: vsseg5e16.v v8, (a0) ; CHECK-NEXT: add a4, a3, a1 +; CHECK-NEXT: add a1, a4, a1 ; CHECK-NEXT: vle16.v v9, (a2) ; CHECK-NEXT: vle16.v v10, (a4) ; CHECK-NEXT: vle16.v v11, (a3) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: add a1, a4, a1 ; CHECK-NEXT: vle16.v v12, (a1) ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v11, v10, 2 @@ -616,11 +616,11 @@ define <10 x half> @vector_interleave5_v10f16_v2f16(<2 x half> %a, <2 x half> %b ; ZVBB-NEXT: vsetvli a4, zero, e16, mf4, ta, ma ; ZVBB-NEXT: vsseg5e16.v v8, (a0) ; ZVBB-NEXT: add a4, a3, a1 +; ZVBB-NEXT: add a1, a4, a1 ; ZVBB-NEXT: vle16.v v9, (a2) ; ZVBB-NEXT: vle16.v v10, (a4) ; ZVBB-NEXT: vle16.v v11, (a3) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: add a1, a4, a1 ; ZVBB-NEXT: vle16.v v12, (a1) ; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVBB-NEXT: vslideup.vi v11, v10, 2 @@ -659,24 +659,24 @@ define <7 x half> @vector_interleave7_v7f16_v1f16(<1 x half> %a, <1 x half> %b, ; CHECK-NEXT: vsseg7e16.v v8, (a0) ; CHECK-NEXT: vle16.v v9, (a4) ; CHECK-NEXT: add a4, a4, a1 -; CHECK-NEXT: vle16.v v10, (a2) -; CHECK-NEXT: add a2, a4, a1 -; CHECK-NEXT: add a1, a2, a1 +; CHECK-NEXT: vle16.v v10, (a3) +; CHECK-NEXT: add a3, a4, a1 ; CHECK-NEXT: vle16.v v11, (a2) -; CHECK-NEXT: vle16.v v12, (a4) +; CHECK-NEXT: add a1, a3, a1 +; CHECK-NEXT: vle16.v v12, (a3) +; CHECK-NEXT: vle16.v v13, (a4) ; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vle16.v v13, (a1) -; CHECK-NEXT: vle16.v v14, (a3) +; CHECK-NEXT: vle16.v v14, (a1) ; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v12, v11, 1 -; CHECK-NEXT: vslideup.vi v8, v10, 1 +; CHECK-NEXT: vslideup.vi v13, v12, 1 +; CHECK-NEXT: vslideup.vi v8, v11, 1 ; CHECK-NEXT: vsetivli zero, 3, e16, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v12, v13, 2 -; CHECK-NEXT: vslideup.vi v8, v14, 2 +; CHECK-NEXT: vslideup.vi v13, v14, 2 +; CHECK-NEXT: vslideup.vi v8, v10, 2 ; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 3 ; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v12, 4 +; CHECK-NEXT: vslideup.vi v8, v13, 4 ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 1 ; CHECK-NEXT: add sp, sp, a0 @@ -703,24 +703,24 @@ define <7 x half> @vector_interleave7_v7f16_v1f16(<1 x half> %a, <1 x half> %b, ; ZVBB-NEXT: vsseg7e16.v v8, (a0) ; ZVBB-NEXT: vle16.v v9, (a4) ; ZVBB-NEXT: add a4, a4, a1 -; ZVBB-NEXT: vle16.v v10, (a2) -; ZVBB-NEXT: add a2, a4, a1 -; ZVBB-NEXT: add a1, a2, a1 +; ZVBB-NEXT: vle16.v v10, (a3) +; ZVBB-NEXT: add a3, a4, a1 ; ZVBB-NEXT: vle16.v v11, (a2) -; ZVBB-NEXT: vle16.v v12, (a4) +; ZVBB-NEXT: add a1, a3, a1 +; ZVBB-NEXT: vle16.v v12, (a3) +; ZVBB-NEXT: vle16.v v13, (a4) ; ZVBB-NEXT: vle16.v v8, (a0) -; ZVBB-NEXT: vle16.v v13, (a1) -; ZVBB-NEXT: vle16.v v14, (a3) +; ZVBB-NEXT: vle16.v v14, (a1) ; ZVBB-NEXT: vsetivli zero, 2, e16, mf2, tu, ma -; ZVBB-NEXT: vslideup.vi v12, v11, 1 -; ZVBB-NEXT: vslideup.vi v8, v10, 1 +; ZVBB-NEXT: vslideup.vi v13, v12, 1 +; ZVBB-NEXT: vslideup.vi v8, v11, 1 ; ZVBB-NEXT: vsetivli zero, 3, e16, mf2, tu, ma -; ZVBB-NEXT: vslideup.vi v12, v13, 2 -; ZVBB-NEXT: vslideup.vi v8, v14, 2 +; ZVBB-NEXT: vslideup.vi v13, v14, 2 +; ZVBB-NEXT: vslideup.vi v8, v10, 2 ; ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma ; ZVBB-NEXT: vslideup.vi v8, v9, 3 ; ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; ZVBB-NEXT: vslideup.vi v8, v12, 4 +; ZVBB-NEXT: vslideup.vi v8, v13, 4 ; ZVBB-NEXT: csrr a0, vlenb ; ZVBB-NEXT: slli a0, a0, 1 ; ZVBB-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll index 6aa62c2256925..53ec22f361254 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave.ll @@ -124,9 +124,9 @@ define @vector_interleave_nxv4i64_nxv2i64( ; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; CHECK-NEXT: vid.v v12 ; CHECK-NEXT: srli a0, a0, 2 -; CHECK-NEXT: vand.vi v13, v12, 1 -; CHECK-NEXT: vmsne.vi v0, v13, 0 ; CHECK-NEXT: vsrl.vi v16, v12, 1 +; CHECK-NEXT: vand.vi v12, v12, 1 +; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vadd.vx v16, v16, a0, v0.t ; CHECK-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -139,9 +139,9 @@ define @vector_interleave_nxv4i64_nxv2i64( ; ZVBB-NEXT: vsetvli a1, zero, e16, m1, ta, mu ; ZVBB-NEXT: vid.v v12 ; ZVBB-NEXT: srli a0, a0, 2 -; ZVBB-NEXT: vand.vi v13, v12, 1 -; ZVBB-NEXT: vmsne.vi v0, v13, 0 ; ZVBB-NEXT: vsrl.vi v16, v12, 1 +; ZVBB-NEXT: vand.vi v12, v12, 1 +; ZVBB-NEXT: vmsne.vi v0, v12, 0 ; ZVBB-NEXT: vadd.vx v16, v16, a0, v0.t ; ZVBB-NEXT: vsetvli zero, zero, e64, m4, ta, ma ; ZVBB-NEXT: vrgatherei16.vv v12, v8, v16 @@ -287,13 +287,13 @@ define @vector_interleave_nxv16i64_nxv8i64( @vector_interleave_nxv16i64_nxv8i64( @llvm.vector.interleave2.nxv16i64( %a, %b) @@ -527,9 +527,9 @@ define @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv4f64_nxv2f64( @vector_interleave_nxv16f64_nxv8f64( @vector_interleave_nxv16f64_nxv8f64( @llvm.vector.interleave2.nxv16f64( %a, %b) @@ -745,12 +745,12 @@ define @vector_interleave_nxv48i1_nxv16i1( ; CHECK-NEXT: srli a2, a1, 2 ; CHECK-NEXT: srli a1, a1, 1 ; CHECK-NEXT: vl2r.v v10, (a3) +; CHECK-NEXT: add a3, a2, a2 ; CHECK-NEXT: vl2r.v v12, (a0) -; CHECK-NEXT: add a0, a2, a2 ; CHECK-NEXT: vmsne.vi v14, v8, 0 ; CHECK-NEXT: vmsne.vi v8, v10, 0 ; CHECK-NEXT: vmsne.vi v0, v12, 0 -; CHECK-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, a3, e8, mf2, ta, ma ; CHECK-NEXT: vslideup.vx v0, v8, a2 ; CHECK-NEXT: add a0, a1, a1 ; CHECK-NEXT: vsetvli zero, a0, e8, m1, ta, ma @@ -788,12 +788,12 @@ define @vector_interleave_nxv48i1_nxv16i1( ; ZVBB-NEXT: srli a2, a1, 2 ; ZVBB-NEXT: srli a1, a1, 1 ; ZVBB-NEXT: vl2r.v v10, (a3) +; ZVBB-NEXT: add a3, a2, a2 ; ZVBB-NEXT: vl2r.v v12, (a0) -; ZVBB-NEXT: add a0, a2, a2 ; ZVBB-NEXT: vmsne.vi v14, v8, 0 ; ZVBB-NEXT: vmsne.vi v8, v10, 0 ; ZVBB-NEXT: vmsne.vi v0, v12, 0 -; ZVBB-NEXT: vsetvli zero, a0, e8, mf2, ta, ma +; ZVBB-NEXT: vsetvli zero, a3, e8, mf2, ta, ma ; ZVBB-NEXT: vslideup.vx v0, v8, a2 ; ZVBB-NEXT: add a0, a1, a1 ; ZVBB-NEXT: vsetvli zero, a0, e8, m1, ta, ma @@ -1045,12 +1045,12 @@ define @vector_interleave_nxv80i1_nxv16i1( ; CHECK-NEXT: vmv2r.v v20, v14 ; CHECK-NEXT: vmv1r.v v0, v9 ; CHECK-NEXT: vmerge.vim v16, v12, 1, v0 +; CHECK-NEXT: add a5, a2, a1 ; CHECK-NEXT: vmv1r.v v21, v18 ; CHECK-NEXT: vmv1r.v v0, v10 ; CHECK-NEXT: vmerge.vim v8, v12, 1, v0 ; CHECK-NEXT: vmv1r.v v22, v16 ; CHECK-NEXT: vmv1r.v v16, v19 -; CHECK-NEXT: add a5, a2, a1 ; CHECK-NEXT: vmv1r.v v23, v8 ; CHECK-NEXT: vmv1r.v v18, v9 ; CHECK-NEXT: vmv1r.v v0, v11 @@ -1121,12 +1121,12 @@ define @vector_interleave_nxv80i1_nxv16i1( ; ZVBB-NEXT: vmv2r.v v20, v14 ; ZVBB-NEXT: vmv1r.v v0, v9 ; ZVBB-NEXT: vmerge.vim v16, v12, 1, v0 +; ZVBB-NEXT: add a5, a2, a1 ; ZVBB-NEXT: vmv1r.v v21, v18 ; ZVBB-NEXT: vmv1r.v v0, v10 ; ZVBB-NEXT: vmerge.vim v8, v12, 1, v0 ; ZVBB-NEXT: vmv1r.v v22, v16 ; ZVBB-NEXT: vmv1r.v v16, v19 -; ZVBB-NEXT: add a5, a2, a1 ; ZVBB-NEXT: vmv1r.v v23, v8 ; ZVBB-NEXT: vmv1r.v v18, v9 ; ZVBB-NEXT: vmv1r.v v0, v11 @@ -1192,26 +1192,26 @@ define @vector_interleave_nxv80i8_nxv16i8( ; RV32-NEXT: andi sp, sp, -64 ; RV32-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; RV32-NEXT: vmv2r.v v20, v16 -; RV32-NEXT: addi a0, sp, 64 ; RV32-NEXT: vmv2r.v v18, v12 +; RV32-NEXT: vmv2r.v v16, v8 +; RV32-NEXT: addi a0, sp, 64 ; RV32-NEXT: csrr a1, vlenb ; RV32-NEXT: slli a2, a1, 2 ; RV32-NEXT: add a1, a2, a1 ; RV32-NEXT: add a1, sp, a1 ; RV32-NEXT: addi a1, a1, 64 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: vmv2r.v v16, v8 +; RV32-NEXT: add a3, a0, a2 +; RV32-NEXT: add a4, a1, a2 ; RV32-NEXT: vmv2r.v v22, v16 ; RV32-NEXT: vmv2r.v v24, v18 ; RV32-NEXT: vmv1r.v v26, v20 -; RV32-NEXT: add a3, a0, a2 -; RV32-NEXT: vmv1r.v v23, v10 -; RV32-NEXT: add a4, a1, a2 ; RV32-NEXT: add a5, a4, a2 -; RV32-NEXT: vmv1r.v v25, v14 +; RV32-NEXT: vmv1r.v v23, v10 ; RV32-NEXT: add a6, a5, a2 -; RV32-NEXT: vmv1r.v v18, v11 +; RV32-NEXT: vmv1r.v v25, v14 ; RV32-NEXT: vsseg5e8.v v22, (a0) +; RV32-NEXT: vmv1r.v v18, v11 ; RV32-NEXT: vmv1r.v v20, v15 ; RV32-NEXT: vsseg5e8.v v17, (a1) ; RV32-NEXT: vl1r.v v16, (a6) @@ -1230,10 +1230,10 @@ define @vector_interleave_nxv80i8_nxv16i8( ; RV32-NEXT: add a0, sp, a0 ; RV32-NEXT: addi a0, a0, 64 ; RV32-NEXT: add a6, a6, a2 +; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: vl1r.v v15, (a5) ; RV32-NEXT: vl1r.v v12, (a6) ; RV32-NEXT: vl1r.v v13, (a1) -; RV32-NEXT: slli a2, a2, 3 ; RV32-NEXT: add a2, a0, a2 ; RV32-NEXT: vs2r.v v16, (a2) ; RV32-NEXT: vs8r.v v8, (a0) @@ -1258,26 +1258,26 @@ define @vector_interleave_nxv80i8_nxv16i8( ; RV64-NEXT: andi sp, sp, -64 ; RV64-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; RV64-NEXT: vmv2r.v v20, v16 -; RV64-NEXT: addi a0, sp, 64 ; RV64-NEXT: vmv2r.v v18, v12 +; RV64-NEXT: vmv2r.v v16, v8 +; RV64-NEXT: addi a0, sp, 64 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a2, a1, 2 ; RV64-NEXT: add a1, a2, a1 ; RV64-NEXT: add a1, sp, a1 ; RV64-NEXT: addi a1, a1, 64 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: vmv2r.v v16, v8 +; RV64-NEXT: add a3, a0, a2 +; RV64-NEXT: add a4, a1, a2 ; RV64-NEXT: vmv2r.v v22, v16 ; RV64-NEXT: vmv2r.v v24, v18 ; RV64-NEXT: vmv1r.v v26, v20 -; RV64-NEXT: add a3, a0, a2 -; RV64-NEXT: vmv1r.v v23, v10 -; RV64-NEXT: add a4, a1, a2 ; RV64-NEXT: add a5, a4, a2 -; RV64-NEXT: vmv1r.v v25, v14 +; RV64-NEXT: vmv1r.v v23, v10 ; RV64-NEXT: add a6, a5, a2 -; RV64-NEXT: vmv1r.v v18, v11 +; RV64-NEXT: vmv1r.v v25, v14 ; RV64-NEXT: vsseg5e8.v v22, (a0) +; RV64-NEXT: vmv1r.v v18, v11 ; RV64-NEXT: vmv1r.v v20, v15 ; RV64-NEXT: vsseg5e8.v v17, (a1) ; RV64-NEXT: vl1r.v v16, (a6) @@ -1296,10 +1296,10 @@ define @vector_interleave_nxv80i8_nxv16i8( ; RV64-NEXT: add a0, sp, a0 ; RV64-NEXT: addi a0, a0, 64 ; RV64-NEXT: add a6, a6, a2 +; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: vl1r.v v15, (a5) ; RV64-NEXT: vl1r.v v12, (a6) ; RV64-NEXT: vl1r.v v13, (a1) -; RV64-NEXT: slli a2, a2, 3 ; RV64-NEXT: add a2, a0, a2 ; RV64-NEXT: vs2r.v v16, (a2) ; RV64-NEXT: vs8r.v v8, (a0) @@ -1324,26 +1324,26 @@ define @vector_interleave_nxv80i8_nxv16i8( ; ZVBB-RV32-NEXT: andi sp, sp, -64 ; ZVBB-RV32-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; ZVBB-RV32-NEXT: vmv2r.v v20, v16 -; ZVBB-RV32-NEXT: addi a0, sp, 64 ; ZVBB-RV32-NEXT: vmv2r.v v18, v12 +; ZVBB-RV32-NEXT: vmv2r.v v16, v8 +; ZVBB-RV32-NEXT: addi a0, sp, 64 ; ZVBB-RV32-NEXT: csrr a1, vlenb ; ZVBB-RV32-NEXT: slli a2, a1, 2 ; ZVBB-RV32-NEXT: add a1, a2, a1 ; ZVBB-RV32-NEXT: add a1, sp, a1 ; ZVBB-RV32-NEXT: addi a1, a1, 64 ; ZVBB-RV32-NEXT: csrr a2, vlenb -; ZVBB-RV32-NEXT: vmv2r.v v16, v8 +; ZVBB-RV32-NEXT: add a3, a0, a2 +; ZVBB-RV32-NEXT: add a4, a1, a2 ; ZVBB-RV32-NEXT: vmv2r.v v22, v16 ; ZVBB-RV32-NEXT: vmv2r.v v24, v18 ; ZVBB-RV32-NEXT: vmv1r.v v26, v20 -; ZVBB-RV32-NEXT: add a3, a0, a2 -; ZVBB-RV32-NEXT: vmv1r.v v23, v10 -; ZVBB-RV32-NEXT: add a4, a1, a2 ; ZVBB-RV32-NEXT: add a5, a4, a2 -; ZVBB-RV32-NEXT: vmv1r.v v25, v14 +; ZVBB-RV32-NEXT: vmv1r.v v23, v10 ; ZVBB-RV32-NEXT: add a6, a5, a2 -; ZVBB-RV32-NEXT: vmv1r.v v18, v11 +; ZVBB-RV32-NEXT: vmv1r.v v25, v14 ; ZVBB-RV32-NEXT: vsseg5e8.v v22, (a0) +; ZVBB-RV32-NEXT: vmv1r.v v18, v11 ; ZVBB-RV32-NEXT: vmv1r.v v20, v15 ; ZVBB-RV32-NEXT: vsseg5e8.v v17, (a1) ; ZVBB-RV32-NEXT: vl1r.v v16, (a6) @@ -1362,10 +1362,10 @@ define @vector_interleave_nxv80i8_nxv16i8( ; ZVBB-RV32-NEXT: add a0, sp, a0 ; ZVBB-RV32-NEXT: addi a0, a0, 64 ; ZVBB-RV32-NEXT: add a6, a6, a2 +; ZVBB-RV32-NEXT: slli a2, a2, 3 ; ZVBB-RV32-NEXT: vl1r.v v15, (a5) ; ZVBB-RV32-NEXT: vl1r.v v12, (a6) ; ZVBB-RV32-NEXT: vl1r.v v13, (a1) -; ZVBB-RV32-NEXT: slli a2, a2, 3 ; ZVBB-RV32-NEXT: add a2, a0, a2 ; ZVBB-RV32-NEXT: vs2r.v v16, (a2) ; ZVBB-RV32-NEXT: vs8r.v v8, (a0) @@ -1390,26 +1390,26 @@ define @vector_interleave_nxv80i8_nxv16i8( ; ZVBB-RV64-NEXT: andi sp, sp, -64 ; ZVBB-RV64-NEXT: vsetvli a0, zero, e8, m1, ta, ma ; ZVBB-RV64-NEXT: vmv2r.v v20, v16 -; ZVBB-RV64-NEXT: addi a0, sp, 64 ; ZVBB-RV64-NEXT: vmv2r.v v18, v12 +; ZVBB-RV64-NEXT: vmv2r.v v16, v8 +; ZVBB-RV64-NEXT: addi a0, sp, 64 ; ZVBB-RV64-NEXT: csrr a1, vlenb ; ZVBB-RV64-NEXT: slli a2, a1, 2 ; ZVBB-RV64-NEXT: add a1, a2, a1 ; ZVBB-RV64-NEXT: add a1, sp, a1 ; ZVBB-RV64-NEXT: addi a1, a1, 64 ; ZVBB-RV64-NEXT: csrr a2, vlenb -; ZVBB-RV64-NEXT: vmv2r.v v16, v8 +; ZVBB-RV64-NEXT: add a3, a0, a2 +; ZVBB-RV64-NEXT: add a4, a1, a2 ; ZVBB-RV64-NEXT: vmv2r.v v22, v16 ; ZVBB-RV64-NEXT: vmv2r.v v24, v18 ; ZVBB-RV64-NEXT: vmv1r.v v26, v20 -; ZVBB-RV64-NEXT: add a3, a0, a2 -; ZVBB-RV64-NEXT: vmv1r.v v23, v10 -; ZVBB-RV64-NEXT: add a4, a1, a2 ; ZVBB-RV64-NEXT: add a5, a4, a2 -; ZVBB-RV64-NEXT: vmv1r.v v25, v14 +; ZVBB-RV64-NEXT: vmv1r.v v23, v10 ; ZVBB-RV64-NEXT: add a6, a5, a2 -; ZVBB-RV64-NEXT: vmv1r.v v18, v11 +; ZVBB-RV64-NEXT: vmv1r.v v25, v14 ; ZVBB-RV64-NEXT: vsseg5e8.v v22, (a0) +; ZVBB-RV64-NEXT: vmv1r.v v18, v11 ; ZVBB-RV64-NEXT: vmv1r.v v20, v15 ; ZVBB-RV64-NEXT: vsseg5e8.v v17, (a1) ; ZVBB-RV64-NEXT: vl1r.v v16, (a6) @@ -1428,10 +1428,10 @@ define @vector_interleave_nxv80i8_nxv16i8( ; ZVBB-RV64-NEXT: add a0, sp, a0 ; ZVBB-RV64-NEXT: addi a0, a0, 64 ; ZVBB-RV64-NEXT: add a6, a6, a2 +; ZVBB-RV64-NEXT: slli a2, a2, 3 ; ZVBB-RV64-NEXT: vl1r.v v15, (a5) ; ZVBB-RV64-NEXT: vl1r.v v12, (a6) ; ZVBB-RV64-NEXT: vl1r.v v13, (a1) -; ZVBB-RV64-NEXT: slli a2, a2, 3 ; ZVBB-RV64-NEXT: add a2, a0, a2 ; ZVBB-RV64-NEXT: vs2r.v v16, (a2) ; ZVBB-RV64-NEXT: vs8r.v v8, (a0) @@ -1521,26 +1521,26 @@ define @vector_interleave_nxv20i32_nxv4i32( @vector_interleave_nxv20i32_nxv4i32( @vector_interleave_nxv20i32_nxv4i32( @vector_interleave_nxv20i32_nxv4i32( @vector_interleave_nxv20i32_nxv4i32( @vector_interleave_nxv20i32_nxv4i32( @vector_interleave_nxv20i32_nxv4i32( @vector_interleave_nxv20i32_nxv4i32( @vector_interleave_nxv10i64_nxv2i64( @vector_interleave_nxv10i64_nxv2i64( @vector_interleave_nxv10i64_nxv2i64( @vector_interleave_nxv10i64_nxv2i64( @vector_interleave_nxv10i64_nxv2i64( @vector_interleave_nxv10i64_nxv2i64( @vector_interleave_nxv10i64_nxv2i64( @vector_interleave_nxv10i64_nxv2i64( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i1_nxv16i1( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv112i8_nxv16i8( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv56i16_nxv8i16( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv28i32_nxv4i32( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @vector_interleave_nxv14i64_nxv2i64( @test2( %a, ; CHECK-NEXT: lui a1, %hi(.LCPI1_0) ; CHECK-NEXT: fld fa5, %lo(.LCPI1_0)(a1) ; CHECK-NEXT: lui a1, %hi(.LCPI1_1) -; CHECK-NEXT: fld fa4, %lo(.LCPI1_1)(a1) -; CHECK-NEXT: vsetvli a1, zero, e64, m1, ta, ma +; CHECK-NEXT: vsetvli a2, zero, e64, m1, ta, ma ; CHECK-NEXT: vfmv.v.f v9, fa5 +; CHECK-NEXT: fld fa5, %lo(.LCPI1_1)(a1) ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: vfadd.vf v9, v9, fa4, v0.t +; CHECK-NEXT: vfadd.vf v9, v9, fa5, v0.t ; CHECK-NEXT: vfmul.vv v8, v8, v9, v0.t ; CHECK-NEXT: ret %t = call @llvm.vp.fmul.nxv1f64( %a, splat (double 2.0), %m, i32 %evl) @@ -48,11 +48,11 @@ define @test3( %a, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv16f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: mv a3, a1 -; CHECK-NEXT: slli a1, a1, 1 -; CHECK-NEXT: add a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a5, a2, a3 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 3 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v0, (a5) ; CHECK-NEXT: sub a5, a4, a1 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) ; CHECK-NEXT: sltu a3, a4, a5 -; CHECK-NEXT: vl8re64.v v8, (a2) -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a3, a3, a5 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v24, v16, v8 +; CHECK-NEXT: vfmadd.vv v24, v16, v0 +; CHECK-NEXT: vl8re64.v v0, (a2) +; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: bltu a4, a1, .LBB129_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB129_2: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: mv a1, a0 -; CHECK-NEXT: slli a0, a0, 1 -; CHECK-NEXT: add a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v0, v16, v8 -; CHECK-NEXT: vmv.v.v v8, v0 +; CHECK-NEXT: vfmadd.vv v16, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: vmv8r.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.fma.nxv16f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll index 901f3cd63fa9e..432994de33321 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfmuladd-vp.ll @@ -1108,20 +1108,15 @@ define @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64( %va, @vfma_vv_nxv16f64_unmasked( %va, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: vfma_vv_nxv16f64_unmasked: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 5 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 32 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: li a3, 24 -; CHECK-NEXT: mul a1, a1, a3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: add a5, a2, a3 -; CHECK-NEXT: vl8re64.v v8, (a5) -; CHECK-NEXT: csrr a5, vlenb -; CHECK-NEXT: slli a5, a5, 3 -; CHECK-NEXT: add a5, sp, a5 -; CHECK-NEXT: addi a5, a5, 16 -; CHECK-NEXT: vs8r.v v8, (a5) # Unknown-size Folded Spill +; CHECK-NEXT: vl8re64.v v0, (a5) ; CHECK-NEXT: sub a5, a4, a1 ; CHECK-NEXT: add a3, a0, a3 ; CHECK-NEXT: vl8re64.v v24, (a3) ; CHECK-NEXT: sltu a3, a4, a5 -; CHECK-NEXT: vl8re64.v v8, (a2) -; CHECK-NEXT: addi a2, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a2) # Unknown-size Folded Spill -; CHECK-NEXT: vl8re64.v v0, (a0) ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a3, a3, a5 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v24, v16, v8 +; CHECK-NEXT: vfmadd.vv v24, v16, v0 +; CHECK-NEXT: vl8re64.v v0, (a2) +; CHECK-NEXT: vl8re64.v v16, (a0) ; CHECK-NEXT: bltu a4, a1, .LBB93_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB93_2: -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: li a1, 24 -; CHECK-NEXT: mul a0, a0, a1 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v16, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vfmadd.vv v0, v16, v8 -; CHECK-NEXT: vmv.v.v v8, v0 +; CHECK-NEXT: vfmadd.vv v16, v8, v0 +; CHECK-NEXT: vmv.v.v v8, v16 ; CHECK-NEXT: vmv8r.v v16, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 5 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.fmuladd.nxv16f64( %va, %b, %c, splat (i1 true), i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll index 63156e1399293..6f4d2dd626bfb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfptrunc-vp.ll @@ -149,69 +149,68 @@ define @vfptrunc_nxv32f32_nxv32f64( ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a1, a1, 3 ; CHECK-NEXT: add a1, sp, a1 ; CHECK-NEXT: addi a1, a1, 16 +; CHECK-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill +; CHECK-NEXT: addi a1, sp, 16 ; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: srli a5, a1, 2 -; CHECK-NEXT: slli a6, a1, 3 -; CHECK-NEXT: slli a4, a1, 1 -; CHECK-NEXT: vslidedown.vx v16, v0, a5 -; CHECK-NEXT: add a6, a0, a6 -; CHECK-NEXT: sub a5, a2, a4 -; CHECK-NEXT: vl8re64.v v24, (a6) -; CHECK-NEXT: sltu a6, a2, a5 +; CHECK-NEXT: srli a4, a1, 2 +; CHECK-NEXT: slli a5, a1, 3 +; CHECK-NEXT: slli a3, a1, 1 +; CHECK-NEXT: vslidedown.vx v16, v0, a4 +; CHECK-NEXT: add a5, a0, a5 +; CHECK-NEXT: sub a4, a2, a3 +; CHECK-NEXT: vl8re64.v v24, (a5) +; CHECK-NEXT: sltu a5, a2, a4 +; CHECK-NEXT: addi a5, a5, -1 +; CHECK-NEXT: and a4, a5, a4 +; CHECK-NEXT: sub a5, a4, a1 +; CHECK-NEXT: sltu a6, a4, a5 ; CHECK-NEXT: addi a6, a6, -1 -; CHECK-NEXT: and a5, a6, a5 -; CHECK-NEXT: sub a6, a5, a1 -; CHECK-NEXT: sltu a7, a5, a6 -; CHECK-NEXT: addi a7, a7, -1 -; CHECK-NEXT: vl8re64.v v8, (a0) -; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v0, v16, a3 -; CHECK-NEXT: and a0, a7, a6 -; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; CHECK-NEXT: and a6, a6, a5 +; CHECK-NEXT: srli a5, a1, 3 +; CHECK-NEXT: vsetvli a7, zero, e8, mf4, ta, ma +; CHECK-NEXT: vslidedown.vx v0, v16, a5 +; CHECK-NEXT: vsetvli zero, a6, e32, m4, ta, ma ; CHECK-NEXT: vfncvt.f.f.w v20, v24, v0.t -; CHECK-NEXT: bltu a5, a1, .LBB8_2 +; CHECK-NEXT: vl8re64.v v24, (a0) +; CHECK-NEXT: bltu a4, a1, .LBB8_2 ; CHECK-NEXT: # %bb.1: -; CHECK-NEXT: mv a5, a1 +; CHECK-NEXT: mv a4, a1 ; CHECK-NEXT: .LBB8_2: ; CHECK-NEXT: vmv1r.v v0, v16 ; CHECK-NEXT: vsetvli a0, zero, e8, mf4, ta, ma -; CHECK-NEXT: vslidedown.vx v6, v7, a3 -; CHECK-NEXT: vsetvli zero, a5, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v16, v8, v0.t -; CHECK-NEXT: bltu a2, a4, .LBB8_4 +; CHECK-NEXT: vslidedown.vx v6, v7, a5 +; CHECK-NEXT: vsetvli zero, a4, e32, m4, ta, ma +; CHECK-NEXT: vfncvt.f.f.w v16, v24, v0.t +; CHECK-NEXT: bltu a2, a3, .LBB8_4 ; CHECK-NEXT: # %bb.3: -; CHECK-NEXT: mv a2, a4 +; CHECK-NEXT: mv a2, a3 ; CHECK-NEXT: .LBB8_4: ; CHECK-NEXT: sub a0, a2, a1 ; CHECK-NEXT: sltu a3, a2, a0 ; CHECK-NEXT: addi a3, a3, -1 ; CHECK-NEXT: and a0, a3, a0 ; CHECK-NEXT: vmv1r.v v0, v6 -; CHECK-NEXT: addi a3, sp, 16 -; CHECK-NEXT: vl8r.v v8, (a3) # Unknown-size Folded Reload +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: add a3, sp, a3 +; CHECK-NEXT: addi a3, a3, 16 +; CHECK-NEXT: vl8r.v v24, (a3) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v28, v8, v0.t +; CHECK-NEXT: vfncvt.f.f.w v12, v24, v0.t ; CHECK-NEXT: bltu a2, a1, .LBB8_6 ; CHECK-NEXT: # %bb.5: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB8_6: ; CHECK-NEXT: vmv1r.v v0, v7 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: addi a0, sp, 16 +; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload ; CHECK-NEXT: vsetvli zero, a2, e32, m4, ta, ma -; CHECK-NEXT: vfncvt.f.f.w v24, v8, v0.t -; CHECK-NEXT: vmv8r.v v8, v24 +; CHECK-NEXT: vfncvt.f.f.w v8, v24, v0.t ; CHECK-NEXT: csrr a0, vlenb ; CHECK-NEXT: slli a0, a0, 4 ; CHECK-NEXT: add sp, sp, a0 diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll index 4336b27eb134a..3ace3ccdf0ee4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-vp.ll @@ -175,8 +175,8 @@ define @vfsqrt_vv_nxv32bf16( %va, < ; CHECK-NEXT: sub a3, a0, a1 ; CHECK-NEXT: sltu a4, a0, a3 ; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vslidedown.vx v0, v0, a2 ; CHECK-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v24, v12, v0.t ; CHECK-NEXT: vsetvli zero, zero, e32, m8, ta, ma @@ -210,9 +210,9 @@ define @vfsqrt_vv_nxv32bf16_unmasked( @vfsqrt_vv_nxv32f16( %va, @vfsqrt_vv_nxv32f16_unmasked( %v ; ZVFHMIN-NEXT: sub a3, a0, a1 ; ZVFHMIN-NEXT: sltu a4, a0, a3 ; ZVFHMIN-NEXT: addi a4, a4, -1 -; ZVFHMIN-NEXT: vsetvli a5, zero, e8, mf2, ta, ma -; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: and a3, a4, a3 +; ZVFHMIN-NEXT: vsetvli a4, zero, e8, mf2, ta, ma +; ZVFHMIN-NEXT: vslidedown.vx v0, v16, a2 ; ZVFHMIN-NEXT: vsetvli zero, a3, e16, m4, ta, ma ; ZVFHMIN-NEXT: vfwcvt.f.f.v v16, v12, v0.t ; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m8, ta, ma diff --git a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll index c6ee9e34dc207..8003d8fed58bc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vl-opt-instrs.ll @@ -3109,9 +3109,9 @@ define @vmand_mm( %a, %b, ; NOVLOPT-NEXT: vmand.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmand_mm: @@ -3119,9 +3119,9 @@ define @vmand_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmand.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmand.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3136,9 +3136,9 @@ define @vmnand_mm( %a, %b, ; NOVLOPT-NEXT: vmnand.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmnand_mm: @@ -3146,9 +3146,9 @@ define @vmnand_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmnand.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmnand.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3163,9 +3163,9 @@ define @vmandn_mm( %a, %b, ; NOVLOPT-NEXT: vmandn.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmandn_mm: @@ -3173,9 +3173,9 @@ define @vmandn_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmandn.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmandn.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3190,9 +3190,9 @@ define @vmxor_mm( %a, %b, ; NOVLOPT-NEXT: vmxor.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmxor_mm: @@ -3200,9 +3200,9 @@ define @vmxor_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmxor.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmxor.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3217,9 +3217,9 @@ define @vmor_mm( %a, %b, < ; NOVLOPT-NEXT: vmor.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmor_mm: @@ -3227,9 +3227,9 @@ define @vmor_mm( %a, %b, < ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmor.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmor.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3245,9 +3245,9 @@ define @vmnor_mm( %a, %b, ; NOVLOPT-NEXT: vmnor.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmnor_mm: @@ -3255,9 +3255,9 @@ define @vmnor_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmnor.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmnor.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3272,9 +3272,9 @@ define @vmorn_mm( %a, %b, ; NOVLOPT-NEXT: vmorn.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmorn_mm: @@ -3282,9 +3282,9 @@ define @vmorn_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmorn.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmorn.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) @@ -3299,9 +3299,9 @@ define @vmxnor_mm( %a, %b, ; NOVLOPT-NEXT: vmxnor.mm v8, v0, v8 ; NOVLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; NOVLOPT-NEXT: vmand.mm v0, v0, v8 -; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; NOVLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; NOVLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; NOVLOPT-NEXT: vmv1r.v v8, v9 ; NOVLOPT-NEXT: ret ; ; VLOPT-LABEL: vmxnor_mm: @@ -3309,9 +3309,9 @@ define @vmxnor_mm( %a, %b, ; VLOPT-NEXT: vsetvli zero, a0, e8, mf8, ta, ma ; VLOPT-NEXT: vmxnor.mm v8, v0, v8 ; VLOPT-NEXT: vmand.mm v0, v0, v8 -; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: vsetvli zero, zero, e32, mf2, tu, mu -; VLOPT-NEXT: vadd.vv v8, v9, v9, v0.t +; VLOPT-NEXT: vadd.vv v9, v9, v9, v0.t +; VLOPT-NEXT: vmv1r.v v8, v9 ; VLOPT-NEXT: ret %1 = call @llvm.riscv.vmxnor.nxv1i1( %a, %b, iXLen -1) %2 = call @llvm.riscv.vmand.nxv1i1( %a, %1, iXLen %vl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll index 3df0763fdc757..1b3dd35910522 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmax-vp.ll @@ -1077,12 +1077,11 @@ define @vmax_vx_nxv32i32_evl_nx16( %va, i ; RV64-LABEL: vmax_vx_nxv32i32_evl_nx16: ; RV64: # %bb.0: ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV64-NEXT: vmax.vx v8, v8, a0, v0.t ; RV64-NEXT: srli a1, a1, 2 ; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vmax.vx v8, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmax.vx v16, v16, a0, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll index 8147d467be04e..df7f177681f5e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmaxu-vp.ll @@ -1076,12 +1076,11 @@ define @vmaxu_vx_nxv32i32_evl_nx16( %va, ; RV64-LABEL: vmaxu_vx_nxv32i32_evl_nx16: ; RV64: # %bb.0: ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t ; RV64-NEXT: srli a1, a1, 2 ; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vmaxu.vx v8, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmaxu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll index 614bd4cbde9ec..342c037371b57 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmin-vp.ll @@ -1077,12 +1077,11 @@ define @vmin_vx_nxv32i32_evl_nx16( %va, i ; RV64-LABEL: vmin_vx_nxv32i32_evl_nx16: ; RV64: # %bb.0: ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV64-NEXT: vmin.vx v8, v8, a0, v0.t ; RV64-NEXT: srli a1, a1, 2 ; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vmin.vx v8, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vmin.vx v16, v16, a0, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll index 21160553af59d..6821aa6c7e380 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vminu-vp.ll @@ -1076,12 +1076,11 @@ define @vminu_vx_nxv32i32_evl_nx16( %va, ; RV64-LABEL: vminu_vx_nxv32i32_evl_nx16: ; RV64: # %bb.0: ; RV64-NEXT: csrr a1, vlenb +; RV64-NEXT: vsetvli a2, zero, e32, m8, ta, ma +; RV64-NEXT: vminu.vx v8, v8, a0, v0.t ; RV64-NEXT: srli a1, a1, 2 ; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma -; RV64-NEXT: vslidedown.vx v24, v0, a1 -; RV64-NEXT: vsetvli a1, zero, e32, m8, ta, ma -; RV64-NEXT: vminu.vx v8, v8, a0, v0.t -; RV64-NEXT: vmv1r.v v0, v24 +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetivli zero, 0, e32, m8, ta, ma ; RV64-NEXT: vminu.vx v16, v16, a0, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll index 6407f39a65e8b..275f96d1d526c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmseq.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmseq.ll @@ -1670,12 +1670,12 @@ define @intrinsic_vmseq_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmseq.vv v10, v8, v11, v0.t ; RV32-NEXT: vmv.v.v v0, v10 @@ -1744,12 +1744,12 @@ define @intrinsic_vmseq_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmseq.vv v11, v8, v12, v0.t ; RV32-NEXT: vmv1r.v v0, v11 @@ -1818,12 +1818,12 @@ define @intrinsic_vmseq_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmseq_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vmseq.vv v13, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v13 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll index 45e3840f7e673..2c1a525220eea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsge.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsge.ll @@ -1725,12 +1725,12 @@ define @intrinsic_vmsge_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmsle.vv v10, v11, v8, v0.t ; RV32-NEXT: vmv.v.v v0, v10 @@ -1800,12 +1800,12 @@ define @intrinsic_vmsge_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmsle.vv v11, v12, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v11 @@ -1875,12 +1875,12 @@ define @intrinsic_vmsge_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsge_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vmsle.vv v13, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v13 @@ -2872,12 +2872,12 @@ define @intrinsic_vmsge_maskedoff_mask_vx_nxv2i64_i64( @intrinsic_vmsge_maskedoff_mask_vx_nxv4i64_i64( @intrinsic_vmsgeu_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsgeu_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmsleu.vv v10, v11, v8, v0.t ; RV32-NEXT: vmv.v.v v0, v10 @@ -1761,12 +1761,12 @@ define @intrinsic_vmsgeu_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsgeu_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmsleu.vv v11, v12, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v11 @@ -1836,12 +1836,12 @@ define @intrinsic_vmsgeu_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsgeu_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vmsleu.vv v13, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v13 @@ -2851,12 +2851,12 @@ define @intrinsic_vmsgeu_maskedoff_mask_vx_nxv2i64_i64( @intrinsic_vmsgeu_maskedoff_mask_vx_nxv4i64_i64( @intrinsic_vmsgt_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsgt_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmslt.vv v10, v11, v8, v0.t ; RV32-NEXT: vmv.v.v v0, v10 @@ -1744,12 +1744,12 @@ define @intrinsic_vmsgt_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsgt_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmslt.vv v11, v12, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v11 @@ -1818,12 +1818,12 @@ define @intrinsic_vmsgt_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsgt_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vmslt.vv v13, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v13 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll index d57b9cd5bae53..f67d2ed047ae7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsgtu.ll @@ -1670,12 +1670,12 @@ define @intrinsic_vmsgtu_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsgtu_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmsltu.vv v10, v11, v8, v0.t ; RV32-NEXT: vmv.v.v v0, v10 @@ -1744,12 +1744,12 @@ define @intrinsic_vmsgtu_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsgtu_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmsltu.vv v11, v12, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v11 @@ -1818,12 +1818,12 @@ define @intrinsic_vmsgtu_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsgtu_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vmsltu.vv v13, v16, v8, v0.t ; RV32-NEXT: vmv1r.v v0, v13 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll index 9653dfd2518d8..6aed4286c3495 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsle.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsle.ll @@ -1670,12 +1670,12 @@ define @intrinsic_vmsle_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsle_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmsle.vv v10, v8, v11, v0.t ; RV32-NEXT: vmv.v.v v0, v10 @@ -1744,12 +1744,12 @@ define @intrinsic_vmsle_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsle_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmsle.vv v11, v8, v12, v0.t ; RV32-NEXT: vmv1r.v v0, v11 @@ -1818,12 +1818,12 @@ define @intrinsic_vmsle_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsle_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vmsle.vv v13, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v13 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll index 25ecfa65c7c48..d881b12d7c1e8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsleu.ll @@ -1670,12 +1670,12 @@ define @intrinsic_vmsleu_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsleu_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmsleu.vv v10, v8, v11, v0.t ; RV32-NEXT: vmv.v.v v0, v10 @@ -1744,12 +1744,12 @@ define @intrinsic_vmsleu_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsleu_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmsleu.vv v11, v8, v12, v0.t ; RV32-NEXT: vmv1r.v v0, v11 @@ -1818,12 +1818,12 @@ define @intrinsic_vmsleu_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsleu_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vmsleu.vv v13, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v13 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll index c17495e3b2119..26c3493dd03ab 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmslt.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmslt.ll @@ -1670,12 +1670,12 @@ define @intrinsic_vmslt_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmslt_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmslt.vv v10, v8, v11, v0.t ; RV32-NEXT: vmv.v.v v0, v10 @@ -1744,12 +1744,12 @@ define @intrinsic_vmslt_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmslt_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmslt.vv v11, v8, v12, v0.t ; RV32-NEXT: vmv1r.v v0, v11 @@ -1818,12 +1818,12 @@ define @intrinsic_vmslt_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmslt_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vmslt.vv v13, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v13 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll index a37a02848365d..2d4795b5b8d30 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsltu.ll @@ -1670,12 +1670,12 @@ define @intrinsic_vmsltu_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsltu_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmsltu.vv v10, v8, v11, v0.t ; RV32-NEXT: vmv.v.v v0, v10 @@ -1744,12 +1744,12 @@ define @intrinsic_vmsltu_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsltu_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmsltu.vv v11, v8, v12, v0.t ; RV32-NEXT: vmv1r.v v0, v11 @@ -1818,12 +1818,12 @@ define @intrinsic_vmsltu_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsltu_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vmsltu.vv v13, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v13 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll index ed41a18dcc8d3..9d43267f511e3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmsne.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmsne.ll @@ -1670,12 +1670,12 @@ define @intrinsic_vmsne_mask_vx_nxv1i64_i64( ; RV32-LABEL: intrinsic_vmsne_mask_vx_nxv1i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu +; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vlse64.v v11, (a0), zero -; RV32-NEXT: vmv1r.v v10, v0 ; RV32-NEXT: vmv1r.v v0, v9 ; RV32-NEXT: vmsne.vv v10, v8, v11, v0.t ; RV32-NEXT: vmv.v.v v0, v10 @@ -1744,12 +1744,12 @@ define @intrinsic_vmsne_mask_vx_nxv2i64_i64( ; RV32-LABEL: intrinsic_vmsne_mask_vx_nxv2i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu +; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vlse64.v v12, (a0), zero -; RV32-NEXT: vmv1r.v v11, v0 ; RV32-NEXT: vmv1r.v v0, v10 ; RV32-NEXT: vmsne.vv v11, v8, v12, v0.t ; RV32-NEXT: vmv1r.v v0, v11 @@ -1818,12 +1818,12 @@ define @intrinsic_vmsne_mask_vx_nxv4i64_i64( ; RV32-LABEL: intrinsic_vmsne_mask_vx_nxv4i64_i64: ; RV32: # %bb.0: # %entry ; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu +; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: sw a0, 8(sp) ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: addi a0, sp, 8 -; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vlse64.v v16, (a0), zero -; RV32-NEXT: vmv1r.v v13, v0 ; RV32-NEXT: vmv1r.v v0, v12 ; RV32-NEXT: vmsne.vv v13, v8, v16, v0.t ; RV32-NEXT: vmv1r.v v0, v13 diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll index 4629db26ca034..647960a404d4b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv.s.x.ll @@ -248,8 +248,8 @@ define @intrinsic_vmv.s.x_x_nxv1i64( %0, i6 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: vsetvli zero, a2, e64, m1, ta, mu ; RV32-NEXT: vid.v v9 -; RV32-NEXT: vmseq.vi v0, v9, 0 ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vmseq.vi v0, v9, 0 ; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -274,8 +274,8 @@ define @intrinsic_vmv.s.x_x_nxv2i64( %0, i6 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: vsetvli zero, a2, e64, m2, ta, mu ; RV32-NEXT: vid.v v10 -; RV32-NEXT: vmseq.vi v0, v10, 0 ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vmseq.vi v0, v10, 0 ; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -300,8 +300,8 @@ define @intrinsic_vmv.s.x_x_nxv4i64( %0, i6 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: vsetvli zero, a2, e64, m4, ta, mu ; RV32-NEXT: vid.v v12 -; RV32-NEXT: vmseq.vi v0, v12, 0 ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vmseq.vi v0, v12, 0 ; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret @@ -326,8 +326,8 @@ define @intrinsic_vmv.s.x_x_nxv8i64( %0, i6 ; RV32-NEXT: sw a1, 12(sp) ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, mu ; RV32-NEXT: vid.v v16 -; RV32-NEXT: vmseq.vi v0, v16, 0 ; RV32-NEXT: addi a0, sp, 8 +; RV32-NEXT: vmseq.vi v0, v16, 0 ; RV32-NEXT: vlse64.v v8, (a0), zero, v0.t ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vmv0-elimination.ll b/llvm/test/CodeGen/RISCV/rvv/vmv0-elimination.ll index 1820a92a24880..ba885abdce441 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vmv0-elimination.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vmv0-elimination.ll @@ -16,10 +16,10 @@ define @between_inline_asm( %a, asm "vadd.vv $0, $1, $2", "={v0},^vr,^vr"( %a, %b) %x = call @llvm.riscv.vadd.mask( poison, %a, %b, %mask, i64 -1, i64 0) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll b/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll index a2466c48b0ab7..622f7dfebec9c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-combine-store-reverse.ll @@ -65,9 +65,10 @@ define void @test_different_evl( %val, * ; CHECK-NEXT: vrsub.vx v11, v11, a1 ; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; CHECK-NEXT: vrgatherei16.vv v12, v10, v9 -; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; CHECK-NEXT: vrgather.vv v9, v8, v11 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmsne.vi v0, v12, 0 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vse32.v v9, (a0), v0.t ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll b/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll index b316f5f878816..1c3f2ed6f81b9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-cttz-elts.ll @@ -169,18 +169,18 @@ define i1 @nxv2i32_cmp_evl( %src, %m, i32 %e ; ; RV64-LABEL: nxv2i32_cmp_evl: ; RV64: # %bb.0: -; RV64-NEXT: slli a1, a0, 32 -; RV64-NEXT: srli a1, a1, 32 -; RV64-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; RV64-NEXT: sext.w a1, a0 +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: vsetvli zero, a0, e32, m1, ta, ma ; RV64-NEXT: vmsne.vi v8, v8, 0, v0.t ; RV64-NEXT: vfirst.m a2, v8, v0.t -; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: bltz a2, .LBB6_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a1, a2 +; RV64-NEXT: mv a0, a2 ; RV64-NEXT: .LBB6_2: -; RV64-NEXT: sext.w a1, a1 -; RV64-NEXT: xor a0, a1, a0 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: xor a0, a0, a1 ; RV64-NEXT: seqz a0, a0 ; RV64-NEXT: ret %r = call i32 @llvm.vp.cttz.elts.i32.nxv2i32( %src, i1 0, %m, i32 %evl) diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll index e481891dfd52f..2214523c58e5b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll @@ -331,8 +331,9 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV32: # %bb.0: ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v8, v0 +; RV32-NEXT: slli a2, a1, 1 ; RV32-NEXT: vmv.v.i v9, 0 -; RV32-NEXT: li a2, -1 +; RV32-NEXT: li a1, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma ; RV32-NEXT: vmv.v.i v10, 0 ; RV32-NEXT: csrr a3, vlenb @@ -340,20 +341,19 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV32-NEXT: vmerge.vim v11, v9, 1, v0 ; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v11, v11 -; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: vwmaccu.vx v12, a1, v11 +; RV32-NEXT: add a1, a3, a3 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v11, v12, a3 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v11, 0 -; RV32-NEXT: add a2, a3, a3 ; RV32-NEXT: vmerge.vim v9, v9, 1, v0 -; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma +; RV32-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vx v10, v9, a3 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a1, zero, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: slli a2, a1, 1 ; RV32-NEXT: vsetvli zero, a2, e32, m2, ta, ma ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a1, 32 @@ -383,19 +383,19 @@ define i32 @masked_load_store_factor2_v2_shared_mask_extract( % ; RV64-NEXT: srli a3, a3, 2 ; RV64-NEXT: vwaddu.vv v12, v11, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 +; RV64-NEXT: add a1, a3, a3 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma +; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; RV64-NEXT: vslidedown.vx v11, v12, a3 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v11, 0 -; RV64-NEXT: add a1, a3, a3 ; RV64-NEXT: vmerge.vim v9, v9, 1, v0 ; RV64-NEXT: vsetvli zero, a1, e8, mf2, ta, ma ; RV64-NEXT: vslideup.vx v10, v9, a3 -; RV64-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: srli a1, a4, 32 +; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV64-NEXT: vmsne.vi v0, v10, 0 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0), v0.t ; RV64-NEXT: li a1, 32 @@ -676,6 +676,7 @@ define {, } @not_same_mask( ; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma ; RV32-NEXT: vmv1r.v v9, v0 ; RV32-NEXT: vmv1r.v v0, v8 +; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vmv.v.i v8, 0 ; RV32-NEXT: li a2, -1 ; RV32-NEXT: vsetvli a3, zero, e8, mf2, ta, ma @@ -688,19 +689,18 @@ define {, } @not_same_mask( ; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: vwaddu.vv v12, v9, v11 ; RV32-NEXT: vwmaccu.vx v12, a2, v11 +; RV32-NEXT: add a2, a3, a3 ; RV32-NEXT: vmsne.vi v0, v12, 0 -; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; RV32-NEXT: vslidedown.vx v9, v12, a3 ; RV32-NEXT: vmerge.vim v10, v10, 1, v0 -; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV32-NEXT: vmsne.vi v0, v9, 0 -; RV32-NEXT: add a2, a3, a3 ; RV32-NEXT: vmerge.vim v8, v8, 1, v0 ; RV32-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV32-NEXT: vslideup.vx v10, v8, a3 ; RV32-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; RV32-NEXT: vmsne.vi v0, v10, 0 -; RV32-NEXT: slli a1, a1, 1 ; RV32-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV32-NEXT: vle32.v v10, (a0), v0.t ; RV32-NEXT: li a0, 32 @@ -725,21 +725,21 @@ define {, } @not_same_mask( ; RV64-NEXT: vmv1r.v v0, v9 ; RV64-NEXT: vmerge.vim v9, v8, 1, v0 ; RV64-NEXT: srli a3, a3, 2 +; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: vwaddu.vv v12, v9, v11 ; RV64-NEXT: vwmaccu.vx v12, a2, v11 +; RV64-NEXT: add a2, a3, a3 ; RV64-NEXT: vmsne.vi v0, v12, 0 -; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma +; RV64-NEXT: vsetvli a4, zero, e8, mf2, ta, ma ; RV64-NEXT: vslidedown.vx v9, v12, a3 ; RV64-NEXT: vmerge.vim v10, v10, 1, v0 -; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma ; RV64-NEXT: vmsne.vi v0, v9, 0 -; RV64-NEXT: add a2, a3, a3 ; RV64-NEXT: vmerge.vim v8, v8, 1, v0 ; RV64-NEXT: vsetvli zero, a2, e8, mf2, ta, ma ; RV64-NEXT: vslideup.vx v10, v8, a3 ; RV64-NEXT: vsetvli a2, zero, e8, mf2, ta, ma ; RV64-NEXT: vmsne.vi v0, v10, 0 -; RV64-NEXT: srli a1, a1, 32 ; RV64-NEXT: vsetvli zero, a1, e32, m2, ta, ma ; RV64-NEXT: vle32.v v10, (a0), v0.t ; RV64-NEXT: li a0, 32 diff --git a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll index 1007d1ce649cc..eacc9b329fba3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpgather-sdnode.ll @@ -2435,11 +2435,11 @@ define @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_nxv16f64( %ptrs, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: li a3, 8 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulsu.vx v24, v8, a2 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; RV32-NEXT: vwmulsu.vx v24, v8, a3 ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: bltu a1, a2, .LBB112_2 ; RV32-NEXT: # %bb.1: @@ -2495,9 +2495,9 @@ define @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_nxv16i16_nxv16f64(ptr %base, @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: li a3, 8 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulsu.vx v24, v8, a2 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; RV32-NEXT: vwmulsu.vx v24, v8, a3 ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: bltu a1, a2, .LBB113_2 ; RV32-NEXT: # %bb.1: @@ -2552,9 +2551,9 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: srli a2, a2, 3 ; RV32-NEXT: sltu a1, a1, a3 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2564,20 +2563,19 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: vsetvli a2, zero, e64, m8, ta, ma ; RV64-NEXT: vmv1r.v v12, v0 ; RV64-NEXT: vsext.vf4 v16, v10 +; RV64-NEXT: vsext.vf4 v24, v8 ; RV64-NEXT: csrr a2, vlenb +; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: vsll.vi v16, v16, 3 ; RV64-NEXT: sub a3, a1, a2 -; RV64-NEXT: srli a4, a2, 3 -; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: sltu a4, a1, a3 ; RV64-NEXT: addi a4, a4, -1 ; RV64-NEXT: and a3, a4, a3 +; RV64-NEXT: srli a4, a2, 3 +; RV64-NEXT: vsetvli a5, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a4 ; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vluxei64.v v16, (a0), v16, v0.t -; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma -; RV64-NEXT: vsext.vf4 v24, v8 -; RV64-NEXT: vsll.vi v24, v24, 3 ; RV64-NEXT: bltu a1, a2, .LBB113_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: mv a1, a2 @@ -2595,10 +2593,10 @@ define @vpgather_baseidx_sext_nxv16i16_nxv16f64(ptr %base define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base, %idxs, %m, i32 zeroext %evl) { ; RV32-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: -; RV32-NEXT: li a3, 8 +; RV32-NEXT: li a2, 8 +; RV32-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulu.vx v24, v8, a2 ; RV32-NEXT: csrr a2, vlenb -; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; RV32-NEXT: vwmulu.vx v24, v8, a3 ; RV32-NEXT: mv a3, a1 ; RV32-NEXT: bltu a1, a2, .LBB114_2 ; RV32-NEXT: # %bb.1: @@ -2610,19 +2608,19 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV32-NEXT: srli a2, a2, 3 ; RV32-NEXT: sltu a1, a1, a3 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: and a1, a1, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a2 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret ; ; RV64-LABEL: vpgather_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: -; RV64-NEXT: li a3, 8 +; RV64-NEXT: li a2, 8 +; RV64-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; RV64-NEXT: vwmulu.vx v24, v8, a2 ; RV64-NEXT: csrr a2, vlenb -; RV64-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; RV64-NEXT: vwmulu.vx v24, v8, a3 ; RV64-NEXT: mv a3, a1 ; RV64-NEXT: bltu a1, a2, .LBB114_2 ; RV64-NEXT: # %bb.1: @@ -2634,9 +2632,9 @@ define @vpgather_baseidx_zext_nxv16i16_nxv16f64(ptr %base ; RV64-NEXT: srli a2, a2, 3 ; RV64-NEXT: sltu a1, a1, a3 ; RV64-NEXT: addi a1, a1, -1 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a2 ; RV64-NEXT: and a1, a1, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a2 ; RV64-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV64-NEXT: vluxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpload.ll b/llvm/test/CodeGen/RISCV/rvv/vpload.ll index 0844180e49612..b73659e7ce415 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpload.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpload.ll @@ -527,12 +527,12 @@ define @vpload_nxv16f64(ptr %ptr, %m, ; CHECK-NEXT: csrr a2, vlenb ; CHECK-NEXT: sub a3, a1, a2 ; CHECK-NEXT: slli a4, a2, 3 -; CHECK-NEXT: srli a5, a2, 3 -; CHECK-NEXT: vslidedown.vx v0, v0, a5 ; CHECK-NEXT: sltu a5, a1, a3 ; CHECK-NEXT: addi a5, a5, -1 ; CHECK-NEXT: and a3, a5, a3 +; CHECK-NEXT: srli a5, a2, 3 ; CHECK-NEXT: add a4, a0, a4 +; CHECK-NEXT: vslidedown.vx v0, v0, a5 ; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; CHECK-NEXT: vle64.v v16, (a4), v0.t ; CHECK-NEXT: bltu a1, a2, .LBB44_2 @@ -591,9 +591,9 @@ define @vpload_nxv17f64(ptr %ptr, ptr %out, @llvm.vp.merge.nxv128i8(, @vpmerge_vv_nxv128i8( %va, %vb, %m, i32 zeroext %evl) { ; CHECK-LABEL: vpmerge_vv_nxv128i8: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb ; CHECK-NEXT: vsetvli a1, zero, e8, m8, ta, ma ; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: vmv8r.v v24, v16 -; CHECK-NEXT: addi a1, sp, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: vlm.v v0, (a2) ; CHECK-NEXT: slli a1, a1, 3 @@ -572,26 +564,19 @@ define @vpmerge_vv_nxv128i8( %va, @llvm.vp.merge.nxv128i8( %m, %va, %vb, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll index 2cf6248c17598..9340be684f2cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpscatter-sdnode.ll @@ -2268,9 +2268,9 @@ define void @vpscatter_nxv16f64( %val, ; RV32-NEXT: srli a0, a0, 3 ; RV32-NEXT: sltu a1, a1, a2 ; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a0 ; RV32-NEXT: and a1, a1, a2 +; RV32-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a0 ; RV32-NEXT: vsetvli zero, a1, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (zero), v28, v0.t ; RV32-NEXT: ret @@ -2289,21 +2289,21 @@ define void @vpscatter_nxv16f64( %val, ; RV64-NEXT: slli a3, a1, 3 ; RV64-NEXT: add a3, a0, a3 ; RV64-NEXT: vl8re64.v v16, (a3) +; RV64-NEXT: mv a3, a2 ; RV64-NEXT: vl8re64.v v24, (a0) -; RV64-NEXT: mv a0, a2 ; RV64-NEXT: bltu a2, a1, .LBB108_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a1 +; RV64-NEXT: mv a3, a1 ; RV64-NEXT: .LBB108_2: -; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma +; RV64-NEXT: vsetvli zero, a3, e64, m8, ta, ma ; RV64-NEXT: vsoxei64.v v8, (zero), v24, v0.t ; RV64-NEXT: sub a0, a2, a1 ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: sltu a2, a2, a0 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: and a0, a2, a0 +; RV64-NEXT: vsetvli a2, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: addi a1, sp, 16 ; RV64-NEXT: vl8r.v v8, (a1) # Unknown-size Folded Reload ; RV64-NEXT: vsetvli zero, a0, e64, m8, ta, ma @@ -2323,10 +2323,10 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV32-LABEL: vpscatter_baseidx_nxv16i16_nxv16f64: ; RV32: # %bb.0: ; RV32-NEXT: vl4re16.v v4, (a1) -; RV32-NEXT: li a3, 8 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulsu.vx v24, v4, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; RV32-NEXT: vwmulsu.vx v24, v4, a3 ; RV32-NEXT: mv a3, a2 ; RV32-NEXT: bltu a2, a1, .LBB109_2 ; RV32-NEXT: # %bb.1: @@ -2338,9 +2338,9 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV32-NEXT: srli a1, a1, 3 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2359,14 +2359,14 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vl4re16.v v24, (a1) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v26 ; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsext.vf4 v16, v24 ; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: mv a3, a2 ; RV64-NEXT: bltu a2, a1, .LBB109_2 ; RV64-NEXT: # %bb.1: @@ -2378,9 +2378,9 @@ define void @vpscatter_baseidx_nxv16i16_nxv16f64( %val, pt ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -2406,10 +2406,10 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV32-LABEL: vpscatter_baseidx_sext_nxv16i16_nxv16f64: ; RV32: # %bb.0: ; RV32-NEXT: vl4re16.v v4, (a1) -; RV32-NEXT: li a3, 8 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulsu.vx v24, v4, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; RV32-NEXT: vwmulsu.vx v24, v4, a3 ; RV32-NEXT: mv a3, a2 ; RV32-NEXT: bltu a2, a1, .LBB110_2 ; RV32-NEXT: # %bb.1: @@ -2421,9 +2421,9 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV32-NEXT: srli a1, a1, 3 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2442,14 +2442,14 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: addi a3, a3, 16 ; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill ; RV64-NEXT: vl4re16.v v24, (a1) -; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: vsetvli a3, zero, e64, m8, ta, ma +; RV64-NEXT: vsetvli a1, zero, e64, m8, ta, ma ; RV64-NEXT: vsext.vf4 v16, v26 ; RV64-NEXT: vsll.vi v16, v16, 3 -; RV64-NEXT: addi a3, sp, 16 -; RV64-NEXT: vs8r.v v16, (a3) # Unknown-size Folded Spill +; RV64-NEXT: addi a1, sp, 16 +; RV64-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill ; RV64-NEXT: vsext.vf4 v16, v24 ; RV64-NEXT: vsll.vi v24, v16, 3 +; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: mv a3, a2 ; RV64-NEXT: bltu a2, a1, .LBB110_2 ; RV64-NEXT: # %bb.1: @@ -2461,9 +2461,9 @@ define void @vpscatter_baseidx_sext_nxv16i16_nxv16f64( %va ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: csrr a1, vlenb ; RV64-NEXT: slli a1, a1, 3 ; RV64-NEXT: add a1, sp, a1 @@ -2490,10 +2490,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV32-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: ; RV32: # %bb.0: ; RV32-NEXT: vl4re16.v v4, (a1) -; RV32-NEXT: li a3, 8 +; RV32-NEXT: li a1, 8 +; RV32-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; RV32-NEXT: vwmulu.vx v24, v4, a1 ; RV32-NEXT: csrr a1, vlenb -; RV32-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; RV32-NEXT: vwmulu.vx v24, v4, a3 ; RV32-NEXT: mv a3, a2 ; RV32-NEXT: bltu a2, a1, .LBB111_2 ; RV32-NEXT: # %bb.1: @@ -2505,9 +2505,9 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV32-NEXT: srli a1, a1, 3 ; RV32-NEXT: sltu a2, a2, a3 ; RV32-NEXT: addi a2, a2, -1 -; RV32-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: and a2, a2, a3 +; RV32-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV32-NEXT: vslidedown.vx v0, v0, a1 ; RV32-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV32-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV32-NEXT: ret @@ -2515,10 +2515,10 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-LABEL: vpscatter_baseidx_zext_nxv16i16_nxv16f64: ; RV64: # %bb.0: ; RV64-NEXT: vl4re16.v v4, (a1) -; RV64-NEXT: li a3, 8 +; RV64-NEXT: li a1, 8 +; RV64-NEXT: vsetvli a3, zero, e16, m4, ta, ma +; RV64-NEXT: vwmulu.vx v24, v4, a1 ; RV64-NEXT: csrr a1, vlenb -; RV64-NEXT: vsetvli a4, zero, e16, m4, ta, ma -; RV64-NEXT: vwmulu.vx v24, v4, a3 ; RV64-NEXT: mv a3, a2 ; RV64-NEXT: bltu a2, a1, .LBB111_2 ; RV64-NEXT: # %bb.1: @@ -2530,9 +2530,9 @@ define void @vpscatter_baseidx_zext_nxv16i16_nxv16f64( %va ; RV64-NEXT: srli a1, a1, 3 ; RV64-NEXT: sltu a2, a2, a3 ; RV64-NEXT: addi a2, a2, -1 -; RV64-NEXT: vsetvli a4, zero, e8, mf4, ta, ma -; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: and a2, a2, a3 +; RV64-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; RV64-NEXT: vslidedown.vx v0, v0, a1 ; RV64-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; RV64-NEXT: vsoxei32.v v16, (a0), v28, v0.t ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll index 7e7da529bf3d7..5cb4176a1be19 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vpstore.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vpstore.ll @@ -439,15 +439,15 @@ define void @vpstore_nxv16f64( %val, ptr %ptr, %val, ptr %ptr, %v) { ; CHECK-LABEL: vreduce_fmin_nxv10f16: ; CHECK: # %bb.0: ; CHECK-NEXT: csrr a0, vlenb +; CHECK-NEXT: li a1, 10 +; CHECK-NEXT: srli a0, a0, 3 +; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: lui a1, %hi(.LCPI73_0) ; CHECK-NEXT: addi a1, a1, %lo(.LCPI73_0) ; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma ; CHECK-NEXT: vle16.v v12, (a1) -; CHECK-NEXT: srli a0, a0, 3 -; CHECK-NEXT: li a1, 10 -; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: vsetvli zero, a0, e16, m4, ta, ma ; CHECK-NEXT: vfredmin.vs v12, v8, v12 ; CHECK-NEXT: vfmv.f.s fa0, v12 diff --git a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll index 7b460f2c058f8..df0792a68e05a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vrgatherei16-subreg-liveness.ll @@ -12,11 +12,11 @@ define internal void @foo( %v15, %0, %vs12.i.i.i, %1, %v37) { ; NOSUBREG-LABEL: foo: ; NOSUBREG: # %bb.0: # %loopIR.preheader.i.i -; NOSUBREG-NEXT: vsetvli a0, zero, e8, m1, ta, ma -; NOSUBREG-NEXT: vmv.v.i v9, 0 -; NOSUBREG-NEXT: vsetvli zero, zero, e16, m2, ta, ma +; NOSUBREG-NEXT: vsetvli a0, zero, e16, m2, ta, ma ; NOSUBREG-NEXT: vmv.v.i v14, 0 -; NOSUBREG-NEXT: vmv1r.v v8, v9 +; NOSUBREG-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; NOSUBREG-NEXT: vmv.v.i v9, 0 +; NOSUBREG-NEXT: vmv.v.i v8, 0 ; NOSUBREG-NEXT: vsetivli zero, 4, e8, m1, tu, ma ; NOSUBREG-NEXT: vrgatherei16.vv v8, v9, v14 ; NOSUBREG-NEXT: .LBB0_1: # %loopIR3.i.i @@ -32,11 +32,11 @@ define internal void @foo( %v15, %0, @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, ; NO_FOLDING: # %bb.0: ; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu ; NO_FOLDING-NEXT: vlm.v v0, (a0) -; NO_FOLDING-NEXT: vlm.v v8, (a2) -; NO_FOLDING-NEXT: vlm.v v9, (a1) +; NO_FOLDING-NEXT: vlm.v v8, (a1) +; NO_FOLDING-NEXT: vlm.v v9, (a2) ; NO_FOLDING-NEXT: vmv.v.i v10, 0 ; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 -; NO_FOLDING-NEXT: vmv.v.v v0, v8 -; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 -; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 -; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 ; NO_FOLDING-NEXT: vmv.v.v v0, v9 +; NO_FOLDING-NEXT: vmerge.vim v9, v10, 1, v0 +; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 +; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 +; NO_FOLDING-NEXT: vmv.v.v v0, v8 ; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t -; NO_FOLDING-NEXT: vor.vv v8, v10, v8 +; NO_FOLDING-NEXT: vor.vv v8, v10, v9 ; NO_FOLDING-NEXT: ret ; ; FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users: ; FOLDING: # %bb.0: ; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu ; FOLDING-NEXT: vlm.v v0, (a0) -; FOLDING-NEXT: vlm.v v8, (a2) -; FOLDING-NEXT: vlm.v v9, (a1) +; FOLDING-NEXT: vlm.v v8, (a1) +; FOLDING-NEXT: vlm.v v9, (a2) ; FOLDING-NEXT: vmv.v.i v10, 0 ; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 -; FOLDING-NEXT: vmv.v.v v0, v8 -; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 -; FOLDING-NEXT: vadd.vv v10, v11, v8 -; FOLDING-NEXT: vsub.vv v8, v11, v8 ; FOLDING-NEXT: vmv.v.v v0, v9 +; FOLDING-NEXT: vmerge.vim v9, v10, 1, v0 +; FOLDING-NEXT: vadd.vv v10, v11, v9 +; FOLDING-NEXT: vsub.vv v9, v11, v9 +; FOLDING-NEXT: vmv.v.v v0, v8 ; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t -; FOLDING-NEXT: vor.vv v8, v10, v8 +; FOLDING-NEXT: vor.vv v8, v10, v9 ; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y @@ -496,34 +496,34 @@ define @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, p ; NO_FOLDING: # %bb.0: ; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; NO_FOLDING-NEXT: vlm.v v0, (a0) -; NO_FOLDING-NEXT: vlm.v v8, (a2) -; NO_FOLDING-NEXT: vlm.v v9, (a1) +; NO_FOLDING-NEXT: vlm.v v8, (a1) +; NO_FOLDING-NEXT: vlm.v v9, (a2) ; NO_FOLDING-NEXT: vmv.v.i v10, 0 ; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 -; NO_FOLDING-NEXT: vmv1r.v v0, v8 -; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 -; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 -; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 ; NO_FOLDING-NEXT: vmv1r.v v0, v9 +; NO_FOLDING-NEXT: vmerge.vim v9, v10, 1, v0 +; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 +; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 +; NO_FOLDING-NEXT: vmv1r.v v0, v8 ; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t -; NO_FOLDING-NEXT: vor.vv v8, v10, v8 +; NO_FOLDING-NEXT: vor.vv v8, v10, v9 ; NO_FOLDING-NEXT: ret ; ; FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users: ; FOLDING: # %bb.0: ; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; FOLDING-NEXT: vlm.v v0, (a0) -; FOLDING-NEXT: vlm.v v8, (a2) -; FOLDING-NEXT: vlm.v v9, (a1) +; FOLDING-NEXT: vlm.v v8, (a1) +; FOLDING-NEXT: vlm.v v9, (a2) ; FOLDING-NEXT: vmv.v.i v10, 0 ; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 -; FOLDING-NEXT: vmv1r.v v0, v8 -; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 -; FOLDING-NEXT: vadd.vv v10, v11, v8 -; FOLDING-NEXT: vsub.vv v8, v11, v8 ; FOLDING-NEXT: vmv1r.v v0, v9 +; FOLDING-NEXT: vmerge.vim v9, v10, 1, v0 +; FOLDING-NEXT: vadd.vv v10, v11, v9 +; FOLDING-NEXT: vsub.vv v9, v11, v9 +; FOLDING-NEXT: vmv1r.v v0, v8 ; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t -; FOLDING-NEXT: vor.vv v8, v10, v8 +; FOLDING-NEXT: vor.vv v8, v10, v9 ; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll index be2fc6955294d..cc923d8acd245 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-fp.ll @@ -519,10 +519,10 @@ define void @vselect_legalize_regression( %a, @llvm.vp.select.nxv32i32(, @select_nxv32i32( %a, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: select_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a3, vlenb ; CHECK-NEXT: slli a4, a3, 3 ; CHECK-NEXT: slli a1, a3, 1 ; CHECK-NEXT: srli a3, a3, 2 ; CHECK-NEXT: add a4, a0, a4 -; CHECK-NEXT: sub a5, a2, a1 -; CHECK-NEXT: vl8re32.v v8, (a4) -; CHECK-NEXT: sltu a4, a2, a5 +; CHECK-NEXT: vslidedown.vx v0, v0, a3 +; CHECK-NEXT: sub a3, a2, a1 +; CHECK-NEXT: vl8re32.v v24, (a4) +; CHECK-NEXT: sltu a4, a2, a3 ; CHECK-NEXT: addi a4, a4, -1 -; CHECK-NEXT: vl8re32.v v0, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v0, v24, a3 -; CHECK-NEXT: and a4, a4, a5 -; CHECK-NEXT: vsetvli zero, a4, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: and a3, a4, a3 +; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: bltu a2, a1, .LBB27_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB27_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.select.nxv32i32( %a, %b, %c, i32 %evl) ret %v @@ -410,55 +384,29 @@ declare i32 @llvm.vscale.i32() define @select_evl_nxv32i32( %a, %b, %c) { ; CHECK-LABEL: select_evl_nxv32i32: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e8, mf2, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 ; CHECK-NEXT: slli a2, a1, 1 ; CHECK-NEXT: srli a4, a1, 2 ; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: sub a5, a1, a2 -; CHECK-NEXT: vl8re32.v v8, (a3) -; CHECK-NEXT: sltu a3, a1, a5 +; CHECK-NEXT: vslidedown.vx v0, v0, a4 +; CHECK-NEXT: sub a4, a1, a2 +; CHECK-NEXT: vl8re32.v v24, (a3) +; CHECK-NEXT: sltu a3, a1, a4 ; CHECK-NEXT: addi a3, a3, -1 -; CHECK-NEXT: vl8re32.v v0, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v0, v24, a4 -; CHECK-NEXT: and a3, a3, a5 +; CHECK-NEXT: and a3, a3, a4 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vl8re32.v v24, (a0) ; CHECK-NEXT: bltu a1, a2, .LBB28_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a1, a2 ; CHECK-NEXT: .LBB28_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %evl = call i32 @llvm.vscale.i32() %evl0 = mul i32 %evl, 8 @@ -699,54 +647,28 @@ declare @llvm.vp.select.nxv16f64(, @select_nxv16f64( %a, %b, %c, i32 zeroext %evl) { ; CHECK-LABEL: select_nxv16f64: ; CHECK: # %bb.0: -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 4 -; CHECK-NEXT: sub sp, sp, a1 -; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 16 * vlenb -; CHECK-NEXT: csrr a1, vlenb -; CHECK-NEXT: slli a1, a1, 3 -; CHECK-NEXT: add a1, sp, a1 -; CHECK-NEXT: addi a1, a1, 16 -; CHECK-NEXT: vs8r.v v8, (a1) # Unknown-size Folded Spill ; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma -; CHECK-NEXT: vmv1r.v v24, v0 +; CHECK-NEXT: vmv1r.v v7, v0 ; CHECK-NEXT: csrr a1, vlenb ; CHECK-NEXT: slli a3, a1, 3 +; CHECK-NEXT: srli a4, a1, 3 +; CHECK-NEXT: vslidedown.vx v0, v0, a4 ; CHECK-NEXT: sub a4, a2, a1 ; CHECK-NEXT: add a3, a0, a3 -; CHECK-NEXT: sltu a5, a2, a4 -; CHECK-NEXT: vl8re64.v v8, (a3) -; CHECK-NEXT: addi a5, a5, -1 -; CHECK-NEXT: srli a3, a1, 3 -; CHECK-NEXT: vl8re64.v v0, (a0) -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vs8r.v v0, (a0) # Unknown-size Folded Spill -; CHECK-NEXT: vslidedown.vx v0, v24, a3 -; CHECK-NEXT: and a4, a5, a4 -; CHECK-NEXT: vsetvli zero, a4, e64, m8, ta, ma -; CHECK-NEXT: vmerge.vvm v16, v8, v16, v0 +; CHECK-NEXT: vl8re64.v v24, (a3) +; CHECK-NEXT: sltu a3, a2, a4 +; CHECK-NEXT: addi a3, a3, -1 +; CHECK-NEXT: and a3, a3, a4 +; CHECK-NEXT: vsetvli zero, a3, e64, m8, ta, ma +; CHECK-NEXT: vmerge.vvm v16, v24, v16, v0 +; CHECK-NEXT: vl8re64.v v24, (a0) ; CHECK-NEXT: bltu a2, a1, .LBB48_2 ; CHECK-NEXT: # %bb.1: ; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: .LBB48_2: -; CHECK-NEXT: vmv1r.v v0, v24 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 3 -; CHECK-NEXT: add a0, sp, a0 -; CHECK-NEXT: addi a0, a0, 16 -; CHECK-NEXT: vl8r.v v8, (a0) # Unknown-size Folded Reload -; CHECK-NEXT: addi a0, sp, 16 -; CHECK-NEXT: vl8r.v v24, (a0) # Unknown-size Folded Reload +; CHECK-NEXT: vmv1r.v v0, v7 ; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma ; CHECK-NEXT: vmerge.vvm v8, v24, v8, v0 -; CHECK-NEXT: csrr a0, vlenb -; CHECK-NEXT: slli a0, a0, 4 -; CHECK-NEXT: add sp, sp, a0 -; CHECK-NEXT: .cfi_def_cfa sp, 16 -; CHECK-NEXT: addi sp, sp, 16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: ret %v = call @llvm.vp.select.nxv16f64( %a, %b, %c, i32 %evl) ret %v diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll index 5b577dc0f8df9..f359fbfc63632 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.ll @@ -126,10 +126,10 @@ define @test4(i64 %avl, i8 zeroext %cond, @test5(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test5: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi a2, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma -; CHECK-NEXT: bnez a2, .LBB4_3 +; CHECK-NEXT: andi a0, a1, 1 +; CHECK-NEXT: bnez a0, .LBB4_3 ; CHECK-NEXT: # %bb.1: # %if.else ; CHECK-NEXT: vfsub.vv v9, v8, v9 ; CHECK-NEXT: andi a1, a1, 2 @@ -234,8 +234,8 @@ if.end6: ; preds = %if.else5, %if.then4 define @test6(i64 %avl, i8 zeroext %cond, %a, %b) nounwind { ; CHECK-LABEL: test6: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: andi a2, a1, 1 ; CHECK-NEXT: vsetvli zero, a0, e64, m1, ta, ma +; CHECK-NEXT: andi a2, a1, 1 ; CHECK-NEXT: bnez a2, .LBB5_3 ; CHECK-NEXT: # %bb.1: # %if.else ; CHECK-NEXT: vfsub.vv v8, v8, v9 @@ -245,9 +245,9 @@ define @test6(i64 %avl, i8 zeroext %cond, This Inner Loop Header: Depth=1 ; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: sub a0, a0, a3 ; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma ; CHECK-NEXT: vfmacc.vf v16, fa0, v8 ; CHECK-NEXT: vse32.v v16, (a2) -; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma ; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: vsetvli a3, a0, e32, m8, ta, ma ; CHECK-NEXT: bnez a3, .LBB8_1 ; CHECK-NEXT: .LBB8_2: # %for.end ; CHECK-NEXT: ret @@ -494,15 +494,15 @@ define void @saxpy_vec_demanded_fields(i64 %n, float %a, ptr nocapture readonly ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vsetvli zero, a3, e32, m8, ta, ma ; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: vle32.v v16, (a2) ; CHECK-NEXT: sub a0, a0, a3 ; CHECK-NEXT: add a1, a1, a4 ; CHECK-NEXT: vsetvli zero, zero, e32, m8, tu, ma ; CHECK-NEXT: vfmacc.vf v16, fa0, v8 ; CHECK-NEXT: vse32.v v16, (a2) -; CHECK-NEXT: vsetvli a3, a0, e16, m4, ta, ma ; CHECK-NEXT: add a2, a2, a4 +; CHECK-NEXT: vsetvli a3, a0, e16, m4, ta, ma ; CHECK-NEXT: bnez a3, .LBB9_1 ; CHECK-NEXT: .LBB9_2: # %for.end ; CHECK-NEXT: ret @@ -544,9 +544,9 @@ declare void @llvm.riscv.vse.nxv16f32.i64(, ptr nocapture, define @test_vsetvli_x0_x0(ptr %x, ptr %y, %z, i64 %vl, i1 %cond) nounwind { ; CHECK-LABEL: test_vsetvli_x0_x0: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a3, a3, 1 ; CHECK-NEXT: vsetvli zero, a2, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: andi a3, a3, 1 ; CHECK-NEXT: beqz a3, .LBB10_2 ; CHECK-NEXT: # %bb.1: # %if ; CHECK-NEXT: vle16.v v10, (a1) @@ -583,9 +583,9 @@ declare @llvm.riscv.vadd.nxv2i32(, @test_vsetvli_x0_x0_2(ptr %x, ptr %y, ptr %z, i64 %vl, i1 %cond, i1 %cond2, %w) nounwind { ; CHECK-LABEL: test_vsetvli_x0_x0_2: ; CHECK: # %bb.0: # %entry +; CHECK-NEXT: andi a4, a4, 1 ; CHECK-NEXT: vsetvli zero, a3, e32, m1, ta, ma ; CHECK-NEXT: vle32.v v9, (a0) -; CHECK-NEXT: andi a4, a4, 1 ; CHECK-NEXT: beqz a4, .LBB11_2 ; CHECK-NEXT: # %bb.1: # %if ; CHECK-NEXT: vle16.v v10, (a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll index 8b48dc43eca29..fd690bb31f716 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert.ll @@ -109,13 +109,13 @@ define void @test6(ptr nocapture readonly %A, ptr nocapture %B, i64 %n) { ; CHECK-NEXT: .LBB5_2: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: slli a4, a3, 2 +; CHECK-NEXT: add a3, a3, a2 ; CHECK-NEXT: add a5, a0, a4 +; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vle32.v v8, (a5) -; CHECK-NEXT: add a3, a3, a2 ; CHECK-NEXT: vmsle.vi v9, v8, -3 ; CHECK-NEXT: vmsgt.vi v10, v8, 2 ; CHECK-NEXT: vmor.mm v0, v9, v10 -; CHECK-NEXT: add a4, a4, a1 ; CHECK-NEXT: vse32.v v8, (a4), v0.t ; CHECK-NEXT: vsetvli a2, a2, e32, m1, ta, ma ; CHECK-NEXT: bnez a2, .LBB5_2 diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll b/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll index c3b19b59ec3d6..f658a2c6b24a6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-regression.ll @@ -11,10 +11,9 @@ define i32 @illegal_preserve_vl( %a, %x, pt ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a1, zero, e64, m4, ta, ma ; CHECK-NEXT: vadd.vv v12, v12, v12 -; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-NEXT: vmv.x.s a1, v8 ; CHECK-NEXT: vs4r.v v12, (a0) -; CHECK-NEXT: mv a0, a1 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmv.x.s a0, v8 ; CHECK-NEXT: ret %index = add %x, %x store %index, ptr %y diff --git a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll index fd5bf4ebcede8..de12e23345f08 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vtrunc-vp.ll @@ -290,69 +290,68 @@ define @vtrunc_nxv32i64_nxv32i32( %a, @vwaddu_vv_mask_v8i32( %x, @vwadd_wv_mask_v8i32_nonzero( %x, @i1_zext( %va, %vb ; ; RV64-LABEL: i1_zext: ; RV64: # %bb.0: -; RV64-NEXT: li a1, 42 -; RV64-NEXT: vsetvli a2, zero, e64, m1, ta, mu +; RV64-NEXT: vsetvli a1, zero, e64, m1, ta, mu ; RV64-NEXT: vadd.vi v8, v8, 1, v0.t +; RV64-NEXT: li a1, 42 ; RV64-NEXT: sh a1, 0(a0) ; RV64-NEXT: ret %vc = zext %va to diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll index 04ece9d94880c..dcbb1a88d3731 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsub-mask-sdnode.ll @@ -41,8 +41,8 @@ define @vwsubu_vv_mask_v8i32( %x, @vwsub_wv_mask_v8i32_nonzero( %x, This Loop Header: Depth=1 @@ -102,17 +100,17 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: add s0, a2, t6 ; RV32-NEXT: add s1, a4, t6 ; RV32-NEXT: vl2r.v v8, (s0) -; RV32-NEXT: add s0, a0, t6 +; RV32-NEXT: add s0, t6, t2 ; RV32-NEXT: vl2r.v v10, (s1) -; RV32-NEXT: add s1, t6, t2 -; RV32-NEXT: sltu t6, s1, t6 -; RV32-NEXT: add t5, t5, t6 -; RV32-NEXT: xor t6, s1, t4 +; RV32-NEXT: sltu s1, s0, t6 +; RV32-NEXT: add t5, t5, s1 +; RV32-NEXT: add t6, a0, t6 ; RV32-NEXT: vaaddu.vv v8, v8, v10 -; RV32-NEXT: or s2, t6, t5 -; RV32-NEXT: vs2r.v v8, (s0) -; RV32-NEXT: mv t6, s1 -; RV32-NEXT: bnez s2, .LBB0_13 +; RV32-NEXT: vs2r.v v8, (t6) +; RV32-NEXT: xor t6, s0, t4 +; RV32-NEXT: or s1, t6, t5 +; RV32-NEXT: mv t6, s0 +; RV32-NEXT: bnez s1, .LBB0_13 ; RV32-NEXT: # %bb.14: # %middle.block ; RV32-NEXT: # in Loop: Header=BB0_10 Depth=1 ; RV32-NEXT: beq t4, a6, .LBB0_9 @@ -121,27 +119,25 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV32-NEXT: # => This Inner Loop Header: Depth=2 ; RV32-NEXT: add t5, a2, t4 ; RV32-NEXT: add t6, a4, t4 -; RV32-NEXT: add s0, a0, t4 ; RV32-NEXT: lbu t5, 0(t5) ; RV32-NEXT: lbu t6, 0(t6) -; RV32-NEXT: addi t4, t4, 1 -; RV32-NEXT: seqz s1, t4 -; RV32-NEXT: add t3, t3, s1 ; RV32-NEXT: add t5, t5, t6 -; RV32-NEXT: xor t6, t4, a6 +; RV32-NEXT: add t6, a0, t4 +; RV32-NEXT: addi t4, t4, 1 ; RV32-NEXT: addi t5, t5, 1 ; RV32-NEXT: srli t5, t5, 1 -; RV32-NEXT: or t6, t6, t3 -; RV32-NEXT: sb t5, 0(s0) -; RV32-NEXT: bnez t6, .LBB0_15 +; RV32-NEXT: sb t5, 0(t6) +; RV32-NEXT: seqz t5, t4 +; RV32-NEXT: xor t6, t4, a6 +; RV32-NEXT: add t3, t3, t5 +; RV32-NEXT: or t5, t6, t3 +; RV32-NEXT: bnez t5, .LBB0_15 ; RV32-NEXT: j .LBB0_9 ; RV32-NEXT: .LBB0_16: ; RV32-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; RV32-NEXT: lw s2, 4(sp) # 4-byte Folded Reload ; RV32-NEXT: .cfi_restore s0 ; RV32-NEXT: .cfi_restore s1 -; RV32-NEXT: .cfi_restore s2 ; RV32-NEXT: addi sp, sp, 16 ; RV32-NEXT: .cfi_def_cfa_offset 0 ; RV32-NEXT: .LBB0_17: # %for.cond.cleanup @@ -436,16 +432,16 @@ define void @test1(ptr nocapture noundef writeonly %dst, i32 noundef signext %i_ ; RV64-NEXT: add s0, a2, a6 ; RV64-NEXT: add t6, a4, a6 ; RV64-NEXT: csrr t0, vlenb -; RV64-NEXT: li t2, 32 -; RV64-NEXT: slli t1, t1, 32 -; RV64-NEXT: srli t3, t1, 32 -; RV64-NEXT: mul t1, a1, t3 -; RV64-NEXT: add t5, t5, t1 -; RV64-NEXT: mul t1, a3, t3 -; RV64-NEXT: add s0, s0, t1 +; RV64-NEXT: slli t2, t1, 32 ; RV64-NEXT: slli t1, t0, 1 -; RV64-NEXT: mul t3, a5, t3 -; RV64-NEXT: add t6, t6, t3 +; RV64-NEXT: srli t2, t2, 32 +; RV64-NEXT: mul t3, a1, t2 +; RV64-NEXT: add t5, t5, t3 +; RV64-NEXT: mul t3, a3, t2 +; RV64-NEXT: mul t2, a5, t2 +; RV64-NEXT: add s0, s0, t3 +; RV64-NEXT: add t6, t6, t2 +; RV64-NEXT: li t2, 32 ; RV64-NEXT: mv t4, t1 ; RV64-NEXT: bltu t2, t1, .LBB0_4 ; RV64-NEXT: # %bb.3: # %for.cond1.preheader.us.preheader diff --git a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll index 72f25268109a1..ce344bd7553fe 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vxrm-insert.ll @@ -393,8 +393,8 @@ define void @test10(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, pt ; CHECK-NEXT: vle8.v v8, (a1) ; CHECK-NEXT: vle8.v v9, (a2) ; CHECK-NEXT: vaadd.vv v8, v8, v9 -; CHECK-NEXT: sub a3, a3, a4 ; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: sub a3, a3, a4 ; CHECK-NEXT: bnez a3, .LBB9_2 ; CHECK-NEXT: .LBB9_3: # %for.end ; CHECK-NEXT: ret @@ -432,8 +432,8 @@ define void @test11(ptr nocapture %ptr_dest, ptr nocapture readonly %ptr_op1, pt ; CHECK-NEXT: .LBB10_1: # %for.body ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vaadd.vv v8, v8, v9 -; CHECK-NEXT: sub a3, a3, a4 ; CHECK-NEXT: vse8.v v8, (a0) +; CHECK-NEXT: sub a3, a3, a4 ; CHECK-NEXT: beqz a3, .LBB10_3 ; CHECK-NEXT: # %bb.2: # %for.body ; CHECK-NEXT: # in Loop: Header=BB10_1 Depth=1 diff --git a/llvm/test/CodeGen/RISCV/rvv/wrong-chain-fixed-load.ll b/llvm/test/CodeGen/RISCV/rvv/wrong-chain-fixed-load.ll index 5872a0995feba..f94c5635032a4 100644 --- a/llvm/test/CodeGen/RISCV/rvv/wrong-chain-fixed-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/wrong-chain-fixed-load.ll @@ -8,10 +8,10 @@ define void @do.memmove() nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: lui a0, %hi(c) ; CHECK-NEXT: addi a0, a0, %lo(c) -; CHECK-NEXT: addi a1, a0, 16 -; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; CHECK-NEXT: vle64.v v8, (a1) ; CHECK-NEXT: addi a1, a0, 24 +; CHECK-NEXT: addi a2, a0, 16 +; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; CHECK-NEXT: vle64.v v8, (a2) ; CHECK-NEXT: vse64.v v8, (a1) ; CHECK-NEXT: vle64.v v8, (a0) ; CHECK-NEXT: addi a0, a0, 8 diff --git a/llvm/test/CodeGen/RISCV/scmp.ll b/llvm/test/CodeGen/RISCV/scmp.ll index a212714db53e0..8a0baa67d0293 100644 --- a/llvm/test/CodeGen/RISCV/scmp.ll +++ b/llvm/test/CodeGen/RISCV/scmp.ll @@ -89,15 +89,15 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind { ; RV32I-NEXT: lw a2, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: lw a5, 12(a1) -; RV32I-NEXT: lw a6, 12(a0) ; RV32I-NEXT: lw a3, 4(a0) -; RV32I-NEXT: lw a7, 8(a0) -; RV32I-NEXT: beq a6, a5, .LBB4_2 +; RV32I-NEXT: lw a6, 8(a0) +; RV32I-NEXT: lw a7, 12(a0) +; RV32I-NEXT: beq a7, a5, .LBB4_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t2, a6, a5 +; RV32I-NEXT: slt t2, a7, a5 ; RV32I-NEXT: j .LBB4_3 ; RV32I-NEXT: .LBB4_2: -; RV32I-NEXT: sltu t2, a7, a4 +; RV32I-NEXT: sltu t2, a6, a4 ; RV32I-NEXT: .LBB4_3: ; RV32I-NEXT: lw a1, 0(a1) ; RV32I-NEXT: lw t0, 0(a0) @@ -108,23 +108,23 @@ define i8 @scmp.8.128(i128 %x, i128 %y) nounwind { ; RV32I-NEXT: .LBB4_5: ; RV32I-NEXT: sltu a0, t0, a1 ; RV32I-NEXT: .LBB4_6: -; RV32I-NEXT: xor t1, a6, a5 -; RV32I-NEXT: xor t3, a7, a4 +; RV32I-NEXT: xor t1, a7, a5 +; RV32I-NEXT: xor t3, a6, a4 ; RV32I-NEXT: or t1, t3, t1 ; RV32I-NEXT: beqz t1, .LBB4_8 ; RV32I-NEXT: # %bb.7: ; RV32I-NEXT: mv a0, t2 ; RV32I-NEXT: .LBB4_8: -; RV32I-NEXT: beq a6, a5, .LBB4_11 +; RV32I-NEXT: beq a7, a5, .LBB4_11 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: slt a4, a5, a6 +; RV32I-NEXT: slt a4, a5, a7 ; RV32I-NEXT: bne a3, a2, .LBB4_12 ; RV32I-NEXT: .LBB4_10: ; RV32I-NEXT: sltu a1, a1, t0 ; RV32I-NEXT: bnez t1, .LBB4_13 ; RV32I-NEXT: j .LBB4_14 ; RV32I-NEXT: .LBB4_11: -; RV32I-NEXT: sltu a4, a4, a7 +; RV32I-NEXT: sltu a4, a4, a6 ; RV32I-NEXT: beq a3, a2, .LBB4_10 ; RV32I-NEXT: .LBB4_12: ; RV32I-NEXT: sltu a1, a2, a3 diff --git a/llvm/test/CodeGen/RISCV/select-and.ll b/llvm/test/CodeGen/RISCV/select-and.ll index f827e840f4a36..01965a2da23f8 100644 --- a/llvm/test/CodeGen/RISCV/select-and.ll +++ b/llvm/test/CodeGen/RISCV/select-and.ll @@ -12,22 +12,22 @@ define signext i32 @select_of_and(i1 zeroext %a, i1 zeroext %b, i32 signext %c, i32 signext %d) nounwind { ; RV32I-LABEL: select_of_and: ; RV32I: # %bb.0: -; RV32I-NEXT: and a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: bnez a1, .LBB0_2 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: bnez a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: .LBB0_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: select_of_and: ; RV64I: # %bb.0: -; RV64I-NEXT: and a1, a0, a1 -; RV64I-NEXT: mv a0, a2 -; RV64I-NEXT: bnez a1, .LBB0_2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: bnez a0, .LBB0_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: .LBB0_2: +; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ret ; ; RV64I-CCMOV-LABEL: select_of_and: diff --git a/llvm/test/CodeGen/RISCV/select-bare.ll b/llvm/test/CodeGen/RISCV/select-bare.ll index c9e108a1ca9d0..ab03b1a684730 100644 --- a/llvm/test/CodeGen/RISCV/select-bare.ll +++ b/llvm/test/CodeGen/RISCV/select-bare.ll @@ -7,12 +7,12 @@ define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind { ; RV32I-LABEL: bare_select: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a3, a0, 1 -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: bnez a3, .LBB0_2 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: bnez a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: .LBB0_2: +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret ; ; RV64I-CCMOV-LABEL: bare_select: @@ -27,12 +27,12 @@ define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind { define float @bare_select_float(i1 %a, float %b, float %c) nounwind { ; RV32I-LABEL: bare_select_float: ; RV32I: # %bb.0: -; RV32I-NEXT: andi a3, a0, 1 -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: bnez a3, .LBB1_2 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: bnez a0, .LBB1_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a2 +; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: .LBB1_2: +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: ret ; ; RV64I-CCMOV-LABEL: bare_select_float: diff --git a/llvm/test/CodeGen/RISCV/select-cc.ll b/llvm/test/CodeGen/RISCV/select-cc.ll index 1c2a0cf007d11..568fea4df4acc 100644 --- a/llvm/test/CodeGen/RISCV/select-cc.ll +++ b/llvm/test/CodeGen/RISCV/select-cc.ll @@ -163,48 +163,48 @@ define signext i32 @foo(i32 signext %a, ptr %b) nounwind { ; RV64I-CCMOV: # %bb.0: ; RV64I-CCMOV-NEXT: lw a2, 0(a1) ; RV64I-CCMOV-NEXT: lw a3, 0(a1) -; RV64I-CCMOV-NEXT: lw a4, 0(a1) -; RV64I-CCMOV-NEXT: lw a5, 0(a1) -; RV64I-CCMOV-NEXT: xor a6, a0, a2 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a6, a2, a0 -; RV64I-CCMOV-NEXT: xor a2, a0, a3 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a2, a0, a3 +; RV64I-CCMOV-NEXT: xor a4, a0, a2 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a2, a0 ; RV64I-CCMOV-NEXT: lw a2, 0(a1) -; RV64I-CCMOV-NEXT: sltu a3, a4, a0 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a3, a0, a4 +; RV64I-CCMOV-NEXT: xor a4, a0, a3 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a0, a3 ; RV64I-CCMOV-NEXT: lw a3, 0(a1) -; RV64I-CCMOV-NEXT: sltu a4, a0, a5 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a5, a0 -; RV64I-CCMOV-NEXT: lw a4, 0(a1) -; RV64I-CCMOV-NEXT: sltu a5, a0, a2 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a5, a0, a2 +; RV64I-CCMOV-NEXT: sltu a4, a2, a0 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a0, a2 ; RV64I-CCMOV-NEXT: lw a2, 0(a1) -; RV64I-CCMOV-NEXT: sltu a5, a3, a0 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a5, a3, a0 +; RV64I-CCMOV-NEXT: sltu a4, a0, a3 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a3, a0 ; RV64I-CCMOV-NEXT: lw a3, 0(a1) -; RV64I-CCMOV-NEXT: sext.w a5, a0 -; RV64I-CCMOV-NEXT: slt a5, a4, a5 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a5, a0, a4 -; RV64I-CCMOV-NEXT: lw a4, 0(a1) -; RV64I-CCMOV-NEXT: sext.w a5, a0 -; RV64I-CCMOV-NEXT: slt a5, a5, a2 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a5, a2, a0 +; RV64I-CCMOV-NEXT: sltu a4, a0, a2 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a0, a2 ; RV64I-CCMOV-NEXT: lw a2, 0(a1) -; RV64I-CCMOV-NEXT: sext.w a5, a0 -; RV64I-CCMOV-NEXT: slt a5, a5, a3 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a5, a0, a3 +; RV64I-CCMOV-NEXT: sltu a4, a3, a0 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a3, a0 +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a4, a0 +; RV64I-CCMOV-NEXT: slt a4, a2, a4 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a0, a2 +; RV64I-CCMOV-NEXT: lw a2, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a4, a0 +; RV64I-CCMOV-NEXT: slt a4, a4, a3 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a3, a0 +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a4, a0 +; RV64I-CCMOV-NEXT: slt a4, a4, a2 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a0, a2 +; RV64I-CCMOV-NEXT: lw a2, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a4, a0 +; RV64I-CCMOV-NEXT: slt a4, a3, a4 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a3, a0 +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: slti a4, a2, 1 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a0, a2 +; RV64I-CCMOV-NEXT: slti a4, a2, 0 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a3, a0 ; RV64I-CCMOV-NEXT: lw a3, 0(a1) -; RV64I-CCMOV-NEXT: sext.w a5, a0 -; RV64I-CCMOV-NEXT: slt a5, a4, a5 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a5, a4, a0 -; RV64I-CCMOV-NEXT: lw a4, 0(a1) -; RV64I-CCMOV-NEXT: slti a5, a2, 1 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a5, a0, a2 -; RV64I-CCMOV-NEXT: slti a5, a2, 0 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a5, a3, a0 ; RV64I-CCMOV-NEXT: lw a1, 0(a1) -; RV64I-CCMOV-NEXT: slti a3, a4, 1025 -; RV64I-CCMOV-NEXT: mips.ccmov a0, a3, a4, a0 +; RV64I-CCMOV-NEXT: slti a4, a3, 1025 +; RV64I-CCMOV-NEXT: mips.ccmov a0, a4, a3, a0 ; RV64I-CCMOV-NEXT: sltiu a2, a2, 2047 ; RV64I-CCMOV-NEXT: mips.ccmov a0, a2, a1, a0 ; RV64I-CCMOV-NEXT: sext.w a0, a0 diff --git a/llvm/test/CodeGen/RISCV/select-constant-xor.ll b/llvm/test/CodeGen/RISCV/select-constant-xor.ll index 2e26ae78e2dd8..254ff96ef5648 100644 --- a/llvm/test/CodeGen/RISCV/select-constant-xor.ll +++ b/llvm/test/CodeGen/RISCV/select-constant-xor.ll @@ -172,12 +172,12 @@ define i32 @icmpasreq(i32 %input, i32 %a, i32 %b) { ; ; RV64-LABEL: icmpasreq: ; RV64: # %bb.0: -; RV64-NEXT: sext.w a3, a0 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bltz a3, .LBB8_2 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: bltz a0, .LBB8_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB8_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %sh = ashr i32 %input, 31 %c = icmp eq i32 %sh, -1 @@ -197,12 +197,12 @@ define i32 @icmpasrne(i32 %input, i32 %a, i32 %b) { ; ; RV64-LABEL: icmpasrne: ; RV64: # %bb.0: -; RV64-NEXT: sext.w a3, a0 -; RV64-NEXT: mv a0, a1 -; RV64-NEXT: bgez a3, .LBB9_2 +; RV64-NEXT: sext.w a0, a0 +; RV64-NEXT: bgez a0, .LBB9_2 ; RV64-NEXT: # %bb.1: -; RV64-NEXT: mv a0, a2 +; RV64-NEXT: mv a1, a2 ; RV64-NEXT: .LBB9_2: +; RV64-NEXT: mv a0, a1 ; RV64-NEXT: ret %sh = ashr i32 %input, 31 %c = icmp ne i32 %sh, -1 diff --git a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll index 005a01bf1000a..3020e61fd6985 100644 --- a/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll +++ b/llvm/test/CodeGen/RISCV/select-optimize-multiple.ll @@ -96,24 +96,24 @@ entry: define i64 @cmov64(i1 %a, i64 %b, i64 %c) nounwind { ; RV32I-LABEL: cmov64: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: andi a5, a0, 1 -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: bnez a5, .LBB2_2 +; RV32I-NEXT: andi a0, a0, 1 +; RV32I-NEXT: bnez a0, .LBB2_2 ; RV32I-NEXT: # %bb.1: # %entry -; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: .LBB2_2: # %entry +; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: cmov64: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: andi a3, a0, 1 -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: bnez a3, .LBB2_2 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: bnez a0, .LBB2_2 ; RV64I-NEXT: # %bb.1: # %entry -; RV64I-NEXT: mv a0, a2 +; RV64I-NEXT: mv a1, a2 ; RV64I-NEXT: .LBB2_2: # %entry +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret entry: %cond = select i1 %a, i64 %b, i64 %c @@ -161,13 +161,13 @@ define i128 @cmov128(i1 %a, i128 %b, i128 %c) nounwind { ; ; RV64I-LABEL: cmov128: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: andi a5, a0, 1 -; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: bnez a5, .LBB3_2 +; RV64I-NEXT: andi a0, a0, 1 +; RV64I-NEXT: bnez a0, .LBB3_2 ; RV64I-NEXT: # %bb.1: # %entry -; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: mv a1, a3 ; RV64I-NEXT: mv a2, a4 ; RV64I-NEXT: .LBB3_2: # %entry +; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: mv a1, a2 ; RV64I-NEXT: ret entry: @@ -221,9 +221,9 @@ define double @cmovdouble(i1 %a, double %b, double %c) nounwind { ; RV32I-NEXT: sw a3, 8(sp) ; RV32I-NEXT: sw a4, 12(sp) ; RV32I-NEXT: fld fa5, 8(sp) -; RV32I-NEXT: andi a0, a0, 1 ; RV32I-NEXT: sw a1, 8(sp) ; RV32I-NEXT: sw a2, 12(sp) +; RV32I-NEXT: andi a0, a0, 1 ; RV32I-NEXT: beqz a0, .LBB5_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: fld fa5, 8(sp) @@ -301,8 +301,8 @@ entry: define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind { ; RV32I-LABEL: cmovdiffcc: ; RV32I: # %bb.0: # %entry -; RV32I-NEXT: andi a0, a0, 1 ; RV32I-NEXT: andi a1, a1, 1 +; RV32I-NEXT: andi a0, a0, 1 ; RV32I-NEXT: beqz a0, .LBB7_3 ; RV32I-NEXT: # %bb.1: # %entry ; RV32I-NEXT: beqz a1, .LBB7_4 @@ -318,8 +318,8 @@ define i32 @cmovdiffcc(i1 %a, i1 %b, i32 %c, i32 %d, i32 %e, i32 %f) nounwind { ; ; RV64I-LABEL: cmovdiffcc: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: andi a0, a0, 1 ; RV64I-NEXT: andi a1, a1, 1 +; RV64I-NEXT: andi a0, a0, 1 ; RV64I-NEXT: beqz a0, .LBB7_3 ; RV64I-NEXT: # %bb.1: # %entry ; RV64I-NEXT: beqz a1, .LBB7_4 diff --git a/llvm/test/CodeGen/RISCV/select-or.ll b/llvm/test/CodeGen/RISCV/select-or.ll index 338c7c06c3ab8..b1ed06ad5b8cf 100644 --- a/llvm/test/CodeGen/RISCV/select-or.ll +++ b/llvm/test/CodeGen/RISCV/select-or.ll @@ -12,22 +12,22 @@ define signext i32 @select_of_or(i1 zeroext %a, i1 zeroext %b, i32 signext %c, i32 signext %d) nounwind { ; RV32I-LABEL: select_of_or: ; RV32I: # %bb.0: -; RV32I-NEXT: or a1, a0, a1 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: bnez a1, .LBB0_2 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: bnez a0, .LBB0_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: mv a2, a3 ; RV32I-NEXT: .LBB0_2: +; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: select_of_or: ; RV64I: # %bb.0: -; RV64I-NEXT: or a1, a0, a1 -; RV64I-NEXT: mv a0, a2 -; RV64I-NEXT: bnez a1, .LBB0_2 +; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: bnez a0, .LBB0_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: mv a0, a3 +; RV64I-NEXT: mv a2, a3 ; RV64I-NEXT: .LBB0_2: +; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: ret ; ; RV64I-CCMOV-LABEL: select_of_or: diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index e0a16aa05cd00..cb8fddd71e08c 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -269,8 +269,8 @@ define void @test6(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK-NEXT: .LBB5_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: call baz -; CHECK-NEXT: feq.s a1, fa0, fs0 ; CHECK-NEXT: fcvt.w.s a0, fa0, rtz +; CHECK-NEXT: feq.s a1, fa0, fs0 ; CHECK-NEXT: beqz a1, .LBB5_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -289,8 +289,8 @@ define void @test6(i32 signext %arg, i32 signext %arg1) nounwind { ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: call baz -; NOREMOVAL-NEXT: feq.s a1, fa0, fs0 ; NOREMOVAL-NEXT: fcvt.w.s a0, fa0, rtz +; NOREMOVAL-NEXT: feq.s a1, fa0, fs0 ; NOREMOVAL-NEXT: beqz a1, .LBB5_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -526,8 +526,8 @@ define void @test10(i32 signext %arg, i32 signext %arg1) nounwind { ; CHECK-NEXT: .LBB9_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: call baz -; CHECK-NEXT: feq.s a1, fa0, fs0 ; CHECK-NEXT: fmv.x.w a0, fa0 +; CHECK-NEXT: feq.s a1, fa0, fs0 ; CHECK-NEXT: beqz a1, .LBB9_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -546,8 +546,8 @@ define void @test10(i32 signext %arg, i32 signext %arg1) nounwind { ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: sext.w a0, a0 ; NOREMOVAL-NEXT: call baz -; NOREMOVAL-NEXT: feq.s a1, fa0, fs0 ; NOREMOVAL-NEXT: fmv.x.w a0, fa0 +; NOREMOVAL-NEXT: feq.s a1, fa0, fs0 ; NOREMOVAL-NEXT: beqz a1, .LBB9_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: ld ra, 8(sp) # 8-byte Folded Reload @@ -578,8 +578,8 @@ define signext i32 @test11(i64 %arg1, i64 %arg2, i64 %arg3) { ; CHECK-NEXT: .LBB10_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: andi a0, a0, 1234 -; CHECK-NEXT: addi a2, a2, 1 ; CHECK-NEXT: addw a0, a0, a1 +; CHECK-NEXT: addi a2, a2, 1 ; CHECK-NEXT: bltu a2, a3, .LBB10_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ret @@ -591,8 +591,8 @@ define signext i32 @test11(i64 %arg1, i64 %arg2, i64 %arg3) { ; NOREMOVAL-NEXT: .LBB10_1: # %bb2 ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: andi a0, a0, 1234 -; NOREMOVAL-NEXT: addi a2, a2, 1 ; NOREMOVAL-NEXT: add a0, a0, a1 +; NOREMOVAL-NEXT: addi a2, a2, 1 ; NOREMOVAL-NEXT: bltu a2, a3, .LBB10_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: sext.w a0, a0 @@ -626,8 +626,8 @@ define signext i32 @test12(i64 %arg1, i64 %arg2, i64 %arg3) { ; CHECK-NEXT: mulw a2, a0, a1 ; CHECK-NEXT: addw a0, a0, a2 ; CHECK-NEXT: and a2, a2, a0 -; CHECK-NEXT: addi a3, a3, 1 ; CHECK-NEXT: add a0, a2, a1 +; CHECK-NEXT: addi a3, a3, 1 ; CHECK-NEXT: bltu a3, a4, .LBB11_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: mv a0, a2 @@ -643,8 +643,8 @@ define signext i32 @test12(i64 %arg1, i64 %arg2, i64 %arg3) { ; NOREMOVAL-NEXT: mul a4, a0, a1 ; NOREMOVAL-NEXT: add a0, a0, a4 ; NOREMOVAL-NEXT: and a4, a4, a0 -; NOREMOVAL-NEXT: addi a2, a2, 1 ; NOREMOVAL-NEXT: add a0, a4, a1 +; NOREMOVAL-NEXT: addi a2, a2, 1 ; NOREMOVAL-NEXT: bltu a2, a3, .LBB11_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: sext.w a0, a4 @@ -678,8 +678,8 @@ define signext i32 @test13(i64 %arg1, i64 %arg2, i64 %arg3) { ; CHECK-NEXT: .LBB12_1: # %bb2 ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: div a0, a0, a1 -; CHECK-NEXT: addi a2, a2, 1 ; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: addi a2, a2, 1 ; CHECK-NEXT: bltu a2, a3, .LBB12_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: sext.w a0, a0 @@ -692,8 +692,8 @@ define signext i32 @test13(i64 %arg1, i64 %arg2, i64 %arg3) { ; NOREMOVAL-NEXT: .LBB12_1: # %bb2 ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: div a0, a0, a1 -; NOREMOVAL-NEXT: addi a2, a2, 1 ; NOREMOVAL-NEXT: add a0, a0, a1 +; NOREMOVAL-NEXT: addi a2, a2, 1 ; NOREMOVAL-NEXT: bltu a2, a3, .LBB12_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: sext.w a0, a0 @@ -989,8 +989,8 @@ define signext i32 @test15(i64 %arg1, i64 %arg2, i64 %arg3, ptr %arg4) { ; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: andi a0, a0, 1234 ; CHECK-NEXT: addw a0, a0, a1 -; CHECK-NEXT: addi a2, a2, 1 ; CHECK-NEXT: sw a0, 0(a3) +; CHECK-NEXT: addi a2, a2, 1 ; CHECK-NEXT: bltu a2, a4, .LBB17_1 ; CHECK-NEXT: # %bb.2: # %bb7 ; CHECK-NEXT: ret @@ -1003,8 +1003,8 @@ define signext i32 @test15(i64 %arg1, i64 %arg2, i64 %arg3, ptr %arg4) { ; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 ; NOREMOVAL-NEXT: andi a0, a0, 1234 ; NOREMOVAL-NEXT: add a0, a0, a1 -; NOREMOVAL-NEXT: addi a2, a2, 1 ; NOREMOVAL-NEXT: sw a0, 0(a3) +; NOREMOVAL-NEXT: addi a2, a2, 1 ; NOREMOVAL-NEXT: bltu a2, a4, .LBB17_1 ; NOREMOVAL-NEXT: # %bb.2: # %bb7 ; NOREMOVAL-NEXT: sext.w a0, a0 diff --git a/llvm/test/CodeGen/RISCV/shift-amount-mod.ll b/llvm/test/CodeGen/RISCV/shift-amount-mod.ll index 1e893d9baa494..40806c5ecdf48 100644 --- a/llvm/test/CodeGen/RISCV/shift-amount-mod.ll +++ b/llvm/test/CodeGen/RISCV/shift-amount-mod.ll @@ -141,10 +141,9 @@ define i64 @ashr_by_complemented_64(i64 %x) { ; RV32I-NEXT: sub a4, a4, a2 ; RV32I-NEXT: not a2, a4 ; RV32I-NEXT: slli a1, a1, 1 -; RV32I-NEXT: sll a1, a1, a2 -; RV32I-NEXT: or a3, a3, a1 +; RV32I-NEXT: sll a2, a1, a2 ; RV32I-NEXT: mv a1, a0 -; RV32I-NEXT: mv a0, a3 +; RV32I-NEXT: or a0, a3, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ashr_by_complemented_64: @@ -178,25 +177,25 @@ define i32 @shl_by_masked_complemented_32(i32 %x) { define i64 @shl_by_masked_complemented_64(i64 %x) { ; RV32I-LABEL: shl_by_masked_complemented_64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 63 -; RV32I-NEXT: sub a2, a2, a0 -; RV32I-NEXT: andi a4, a2, 63 -; RV32I-NEXT: addi a2, a4, -32 -; RV32I-NEXT: not a3, a0 -; RV32I-NEXT: bltz a2, .LBB7_2 +; RV32I-NEXT: not a2, a0 +; RV32I-NEXT: li a3, 63 +; RV32I-NEXT: sub a3, a3, a0 +; RV32I-NEXT: andi a4, a3, 63 +; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: bltz a3, .LBB7_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: sll a1, a0, a4 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a1, a1, a3 +; RV32I-NEXT: sll a1, a1, a2 ; RV32I-NEXT: not a4, a4 ; RV32I-NEXT: srli a5, a0, 1 ; RV32I-NEXT: srl a4, a5, a4 ; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: sll a0, a0, a3 -; RV32I-NEXT: srai a2, a2, 31 -; RV32I-NEXT: and a0, a2, a0 +; RV32I-NEXT: sll a0, a0, a2 +; RV32I-NEXT: srai a3, a3, 31 +; RV32I-NEXT: and a0, a3, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl_by_masked_complemented_64: @@ -213,25 +212,25 @@ define i64 @shl_by_masked_complemented_64(i64 %x) { define i64 @lshr_by_masked_complemented_64(i64 %x) { ; RV32I-LABEL: lshr_by_masked_complemented_64: ; RV32I: # %bb.0: -; RV32I-NEXT: li a2, 63 -; RV32I-NEXT: sub a2, a2, a0 -; RV32I-NEXT: andi a4, a2, 63 -; RV32I-NEXT: addi a2, a4, -32 -; RV32I-NEXT: not a3, a0 -; RV32I-NEXT: bltz a2, .LBB8_2 +; RV32I-NEXT: not a2, a0 +; RV32I-NEXT: li a3, 63 +; RV32I-NEXT: sub a3, a3, a0 +; RV32I-NEXT: andi a4, a3, 63 +; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: bltz a3, .LBB8_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: srl a0, a1, a4 ; RV32I-NEXT: j .LBB8_3 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: srl a0, a0, a3 +; RV32I-NEXT: srl a0, a0, a2 ; RV32I-NEXT: not a4, a4 ; RV32I-NEXT: slli a5, a1, 1 ; RV32I-NEXT: sll a4, a5, a4 ; RV32I-NEXT: or a0, a0, a4 ; RV32I-NEXT: .LBB8_3: -; RV32I-NEXT: srl a1, a1, a3 -; RV32I-NEXT: srai a2, a2, 31 -; RV32I-NEXT: and a1, a2, a1 +; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: srai a3, a3, 31 +; RV32I-NEXT: and a1, a3, a1 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr_by_masked_complemented_64: @@ -250,22 +249,23 @@ define i64 @ashr_by_masked_complemented_64(i64 %x) { ; RV32I: # %bb.0: ; RV32I-NEXT: li a2, 63 ; RV32I-NEXT: sub a2, a2, a0 -; RV32I-NEXT: andi a2, a2, 63 -; RV32I-NEXT: addi a3, a2, -32 -; RV32I-NEXT: bltz a3, .LBB9_2 +; RV32I-NEXT: andi a3, a2, 63 +; RV32I-NEXT: addi a2, a3, -32 +; RV32I-NEXT: bltz a2, .LBB9_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: srai a1, a1, 31 -; RV32I-NEXT: sra a0, a0, a2 +; RV32I-NEXT: srai a2, a1, 31 +; RV32I-NEXT: sra a0, a1, a3 +; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB9_2: -; RV32I-NEXT: not a3, a0 -; RV32I-NEXT: not a2, a2 -; RV32I-NEXT: slli a4, a1, 1 -; RV32I-NEXT: sra a1, a1, a3 -; RV32I-NEXT: srl a0, a0, a3 -; RV32I-NEXT: sll a2, a4, a2 -; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: not a4, a0 +; RV32I-NEXT: not a3, a3 +; RV32I-NEXT: slli a5, a1, 1 +; RV32I-NEXT: sra a2, a1, a4 +; RV32I-NEXT: srl a0, a0, a4 +; RV32I-NEXT: sll a1, a5, a3 +; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: mv a1, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: ashr_by_masked_complemented_64: diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll index 249dabba0cc28..fcf34b5612689 100644 --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -13,8 +13,8 @@ declare i128 @llvm.fshr.i128(i128, i128, i128) define i64 @lshr64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: lshr64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: srl a3, a1, a2 +; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: bltz a4, .LBB0_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a3 @@ -60,13 +60,12 @@ define i64 @ashr64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: ashr64: ; RV32I: # %bb.0: ; RV32I-NEXT: mv a3, a1 -; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: sra a1, a1, a2 +; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: bltz a4, .LBB2_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srai a3, a3, 31 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: srai a1, a3, 31 ; RV32I-NEXT: ret ; RV32I-NEXT: .LBB2_2: ; RV32I-NEXT: srl a0, a0, a2 @@ -105,8 +104,8 @@ define i64 @ashr64_minsize(i64 %a, i64 %b) minsize nounwind { define i64 @shl64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: shl64: ; RV32I: # %bb.0: -; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: sll a3, a0, a2 +; RV32I-NEXT: addi a4, a2, -32 ; RV32I-NEXT: bltz a4, .LBB4_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a1, a3 @@ -197,8 +196,8 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; ; RV64I-LABEL: lshr128: ; RV64I: # %bb.0: -; RV64I-NEXT: addi a4, a2, -64 ; RV64I-NEXT: srl a3, a1, a2 +; RV64I-NEXT: addi a4, a2, -64 ; RV64I-NEXT: bltz a4, .LBB6_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a3 @@ -268,13 +267,12 @@ define i128 @ashr128(i128 %a, i128 %b) nounwind { ; RV64I-LABEL: ashr128: ; RV64I: # %bb.0: ; RV64I-NEXT: mv a3, a1 -; RV64I-NEXT: addi a4, a2, -64 ; RV64I-NEXT: sra a1, a1, a2 +; RV64I-NEXT: addi a4, a2, -64 ; RV64I-NEXT: bltz a4, .LBB7_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: srai a3, a3, 63 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: srai a1, a3, 63 ; RV64I-NEXT: ret ; RV64I-NEXT: .LBB7_2: ; RV64I-NEXT: srl a0, a0, a2 @@ -308,12 +306,12 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; RV32I-NEXT: srli a1, a2, 3 ; RV32I-NEXT: andi a3, a2, 31 ; RV32I-NEXT: andi a1, a1, 12 +; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: sub a1, a6, a1 ; RV32I-NEXT: lw a4, 0(a1) ; RV32I-NEXT: lw a5, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: sll a7, a5, a2 ; RV32I-NEXT: srli t0, a4, 1 ; RV32I-NEXT: sll a1, a1, a2 @@ -336,8 +334,8 @@ define i128 @shl128(i128 %a, i128 %b) nounwind { ; ; RV64I-LABEL: shl128: ; RV64I: # %bb.0: -; RV64I-NEXT: addi a4, a2, -64 ; RV64I-NEXT: sll a3, a0, a2 +; RV64I-NEXT: addi a4, a2, -64 ; RV64I-NEXT: bltz a4, .LBB8_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a1, a3 @@ -394,21 +392,21 @@ define i64 @fshr64_minsize(i64 %a, i64 %b) minsize nounwind { define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-LABEL: fshr128_minsize: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a2, 0(a2) ; RV32I-NEXT: lw t1, 0(a1) ; RV32I-NEXT: lw a7, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: andi t2, a2, 64 +; RV32I-NEXT: lw a3, 12(a1) +; RV32I-NEXT: lw a1, 0(a2) +; RV32I-NEXT: andi t2, a1, 64 ; RV32I-NEXT: mv t0, a7 -; RV32I-NEXT: mv a3, t1 +; RV32I-NEXT: mv a2, t1 ; RV32I-NEXT: beqz t2, .LBB10_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t0, a1 -; RV32I-NEXT: mv a3, a4 +; RV32I-NEXT: mv t0, a3 +; RV32I-NEXT: mv a2, a4 ; RV32I-NEXT: .LBB10_2: -; RV32I-NEXT: andi a6, a2, 32 -; RV32I-NEXT: mv a5, a3 +; RV32I-NEXT: andi a6, a1, 32 +; RV32I-NEXT: mv a5, a2 ; RV32I-NEXT: bnez a6, .LBB10_13 ; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: bnez t2, .LBB10_14 @@ -418,31 +416,31 @@ define i128 @fshr128_minsize(i128 %a, i128 %b) minsize nounwind { ; RV32I-NEXT: mv t0, a4 ; RV32I-NEXT: .LBB10_6: ; RV32I-NEXT: slli t3, t0, 1 -; RV32I-NEXT: not t1, a2 +; RV32I-NEXT: not t1, a1 ; RV32I-NEXT: beqz t2, .LBB10_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv a1, a7 +; RV32I-NEXT: mv a3, a7 ; RV32I-NEXT: .LBB10_8: -; RV32I-NEXT: srl a7, a5, a2 +; RV32I-NEXT: srl a7, a5, a1 ; RV32I-NEXT: sll t2, t3, t1 -; RV32I-NEXT: srl t0, t0, a2 +; RV32I-NEXT: srl t0, t0, a1 ; RV32I-NEXT: beqz a6, .LBB10_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a4, a1 +; RV32I-NEXT: mv a4, a3 ; RV32I-NEXT: .LBB10_10: ; RV32I-NEXT: or a7, t2, a7 ; RV32I-NEXT: slli t2, a4, 1 ; RV32I-NEXT: sll t2, t2, t1 ; RV32I-NEXT: or t0, t2, t0 -; RV32I-NEXT: srl a4, a4, a2 +; RV32I-NEXT: srl a4, a4, a1 ; RV32I-NEXT: beqz a6, .LBB10_12 ; RV32I-NEXT: # %bb.11: -; RV32I-NEXT: mv a1, a3 +; RV32I-NEXT: mv a3, a2 ; RV32I-NEXT: .LBB10_12: -; RV32I-NEXT: slli a3, a1, 1 -; RV32I-NEXT: srl a1, a1, a2 +; RV32I-NEXT: slli a2, a3, 1 +; RV32I-NEXT: srl a1, a3, a1 ; RV32I-NEXT: slli a5, a5, 1 -; RV32I-NEXT: sll a2, a3, t1 +; RV32I-NEXT: sll a2, a2, t1 ; RV32I-NEXT: sll a3, a5, t1 ; RV32I-NEXT: or a2, a2, a4 ; RV32I-NEXT: or a1, a3, a1 diff --git a/llvm/test/CodeGen/RISCV/shl-cttz.ll b/llvm/test/CodeGen/RISCV/shl-cttz.ll index 500673cc29196..f408011b31456 100644 --- a/llvm/test/CodeGen/RISCV/shl-cttz.ll +++ b/llvm/test/CodeGen/RISCV/shl-cttz.ll @@ -415,20 +415,20 @@ define i32 @shl_cttz_multiuse_i32(i32 %x, i32 %y) { ; RV32I-NEXT: .cfi_offset ra, -4 ; RV32I-NEXT: .cfi_offset s0, -8 ; RV32I-NEXT: .cfi_offset s1, -12 -; RV32I-NEXT: neg a2, a1 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: lui a2, 30667 -; RV32I-NEXT: addi a2, a2, 1329 -; RV32I-NEXT: mul a1, a1, a2 -; RV32I-NEXT: srli a1, a1, 27 -; RV32I-NEXT: lui a2, %hi(.LCPI7_0) -; RV32I-NEXT: addi a2, a2, %lo(.LCPI7_0) -; RV32I-NEXT: add a1, a2, a1 -; RV32I-NEXT: lbu s0, 0(a1) -; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: neg a0, a1 +; RV32I-NEXT: and a0, a1, a0 +; RV32I-NEXT: lui a1, 30667 +; RV32I-NEXT: addi a1, a1, 1329 +; RV32I-NEXT: mul a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 27 +; RV32I-NEXT: lui a1, %hi(.LCPI7_0) +; RV32I-NEXT: addi a1, a1, %lo(.LCPI7_0) +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: lbu s1, 0(a0) +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call use32 -; RV32I-NEXT: sll a0, s1, s0 +; RV32I-NEXT: sll a0, s0, s1 ; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload @@ -474,20 +474,20 @@ define i32 @shl_cttz_multiuse_i32(i32 %x, i32 %y) { ; RV64I-NEXT: .cfi_offset ra, -8 ; RV64I-NEXT: .cfi_offset s0, -16 ; RV64I-NEXT: .cfi_offset s1, -24 -; RV64I-NEXT: negw a2, a1 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: lui a2, 30667 -; RV64I-NEXT: addi a2, a2, 1329 -; RV64I-NEXT: mul a1, a1, a2 -; RV64I-NEXT: srliw a1, a1, 27 -; RV64I-NEXT: lui a2, %hi(.LCPI7_0) -; RV64I-NEXT: addi a2, a2, %lo(.LCPI7_0) -; RV64I-NEXT: add a1, a2, a1 -; RV64I-NEXT: lbu s0, 0(a1) -; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: negw a0, a1 +; RV64I-NEXT: and a0, a1, a0 +; RV64I-NEXT: lui a1, 30667 +; RV64I-NEXT: addi a1, a1, 1329 +; RV64I-NEXT: mul a0, a0, a1 +; RV64I-NEXT: srliw a0, a0, 27 +; RV64I-NEXT: lui a1, %hi(.LCPI7_0) +; RV64I-NEXT: addi a1, a1, %lo(.LCPI7_0) +; RV64I-NEXT: add a0, a1, a0 +; RV64I-NEXT: lbu s1, 0(a0) +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call use32 -; RV64I-NEXT: sllw a0, s1, s0 +; RV64I-NEXT: sllw a0, s0, s1 ; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload @@ -554,8 +554,8 @@ define i64 @shl_cttz_i64(i64 %x, i64 %y) { ; RV32I-NEXT: add a2, a4, a2 ; RV32I-NEXT: lbu a4, 0(a2) ; RV32I-NEXT: .LBB8_3: # %entry -; RV32I-NEXT: addi a3, a4, -32 ; RV32I-NEXT: sll a2, a0, a4 +; RV32I-NEXT: addi a3, a4, -32 ; RV32I-NEXT: bltz a3, .LBB8_5 ; RV32I-NEXT: # %bb.4: # %entry ; RV32I-NEXT: mv a1, a2 @@ -581,8 +581,8 @@ define i64 @shl_cttz_i64(i64 %x, i64 %y) { ; RV32ZBB-NEXT: .LBB8_2: ; RV32ZBB-NEXT: ctz a4, a2 ; RV32ZBB-NEXT: .LBB8_3: # %entry -; RV32ZBB-NEXT: addi a3, a4, -32 ; RV32ZBB-NEXT: sll a2, a0, a4 +; RV32ZBB-NEXT: addi a3, a4, -32 ; RV32ZBB-NEXT: bltz a3, .LBB8_5 ; RV32ZBB-NEXT: # %bb.4: # %entry ; RV32ZBB-NEXT: mv a1, a2 @@ -642,8 +642,8 @@ define i64 @shl_cttz_constant_i64(i64 %y) { ; RV32I-NEXT: lbu a1, 0(a0) ; RV32I-NEXT: .LBB9_3: # %entry ; RV32I-NEXT: li a0, 4 -; RV32I-NEXT: addi a2, a1, -32 ; RV32I-NEXT: sll a0, a0, a1 +; RV32I-NEXT: addi a2, a1, -32 ; RV32I-NEXT: bltz a2, .LBB9_5 ; RV32I-NEXT: # %bb.4: # %entry ; RV32I-NEXT: mv a1, a0 @@ -668,8 +668,8 @@ define i64 @shl_cttz_constant_i64(i64 %y) { ; RV32ZBB-NEXT: ctz a1, a0 ; RV32ZBB-NEXT: .LBB9_3: # %entry ; RV32ZBB-NEXT: li a0, 4 -; RV32ZBB-NEXT: addi a2, a1, -32 ; RV32ZBB-NEXT: sll a0, a0, a1 +; RV32ZBB-NEXT: addi a2, a1, -32 ; RV32ZBB-NEXT: bltz a2, .LBB9_5 ; RV32ZBB-NEXT: # %bb.4: # %entry ; RV32ZBB-NEXT: mv a1, a0 diff --git a/llvm/test/CodeGen/RISCV/split-offsets.ll b/llvm/test/CodeGen/RISCV/split-offsets.ll index 8f5b044c3b3b8..6d14c0d76a45c 100644 --- a/llvm/test/CodeGen/RISCV/split-offsets.ll +++ b/llvm/test/CodeGen/RISCV/split-offsets.ll @@ -56,10 +56,10 @@ define void @test2(ptr %sp, ptr %t, i32 %n) { ; RV32I-LABEL: test2: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: li a3, 0 -; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: lui a4, 20 ; RV32I-NEXT: addi a4, a4, -1920 ; RV32I-NEXT: add a1, a1, a4 +; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: add a0, a0, a4 ; RV32I-NEXT: blez a2, .LBB1_2 ; RV32I-NEXT: .LBB1_1: # %while_body @@ -77,8 +77,8 @@ define void @test2(ptr %sp, ptr %t, i32 %n) { ; RV64I-LABEL: test2: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: li a3, 0 -; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: lui a4, 20 +; RV64I-NEXT: ld a0, 0(a0) ; RV64I-NEXT: addiw a4, a4, -1920 ; RV64I-NEXT: add a1, a1, a4 ; RV64I-NEXT: add a0, a0, a4 diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index 42c87c9660dc9..e3aeae4df2be1 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -304,24 +304,24 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lbu a1, 12(a0) -; RV32-NEXT: lw a2, 8(a0) -; RV32-NEXT: lw a3, 4(a0) ; RV32-NEXT: lw a0, 0(a0) -; RV32-NEXT: slli a4, a1, 30 +; RV32-NEXT: lw a1, 4(s0) +; RV32-NEXT: lw a2, 8(s0) +; RV32-NEXT: lbu a3, 12(s0) +; RV32-NEXT: slli a4, a3, 30 ; RV32-NEXT: srli s1, a2, 2 ; RV32-NEXT: slli a5, a2, 31 ; RV32-NEXT: or s1, s1, a4 -; RV32-NEXT: srli a4, a3, 1 +; RV32-NEXT: srli a4, a1, 1 ; RV32-NEXT: or s2, a4, a5 -; RV32-NEXT: srli a1, a1, 2 +; RV32-NEXT: srli a3, a3, 2 ; RV32-NEXT: srli a2, a2, 1 -; RV32-NEXT: slli a3, a3, 31 ; RV32-NEXT: slli a1, a1, 31 +; RV32-NEXT: slli a3, a3, 31 ; RV32-NEXT: slli a2, a2, 31 -; RV32-NEXT: srai s3, a1, 31 +; RV32-NEXT: srai s3, a3, 31 ; RV32-NEXT: srai s4, a2, 31 -; RV32-NEXT: srai a1, a3, 31 +; RV32-NEXT: srai a1, a1, 31 ; RV32-NEXT: li a2, 6 ; RV32-NEXT: li a3, 0 ; RV32-NEXT: call __moddi3 @@ -383,19 +383,19 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: mv s0, a0 -; RV64-NEXT: lbu a0, 12(a0) -; RV64-NEXT: ld a1, 0(s0) -; RV64-NEXT: lwu a2, 8(s0) -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: srli a3, a1, 2 -; RV64-NEXT: or a0, a2, a0 -; RV64-NEXT: slli a2, a2, 62 -; RV64-NEXT: slli a1, a1, 31 -; RV64-NEXT: or a2, a2, a3 -; RV64-NEXT: slli s1, a0, 29 -; RV64-NEXT: srai a0, a2, 31 -; RV64-NEXT: srai s1, s1, 31 -; RV64-NEXT: srai s2, a1, 31 +; RV64-NEXT: ld a0, 0(a0) +; RV64-NEXT: lwu a1, 8(s0) +; RV64-NEXT: lbu a2, 12(s0) +; RV64-NEXT: slli a2, a2, 32 +; RV64-NEXT: srli a3, a0, 2 +; RV64-NEXT: or a2, a1, a2 +; RV64-NEXT: slli a1, a1, 62 +; RV64-NEXT: slli a4, a0, 31 +; RV64-NEXT: or a0, a1, a3 +; RV64-NEXT: slli a2, a2, 29 +; RV64-NEXT: srai a0, a0, 31 +; RV64-NEXT: srai s1, a2, 31 +; RV64-NEXT: srai s2, a4, 31 ; RV64-NEXT: li a1, 7 ; RV64-NEXT: call __moddi3 ; RV64-NEXT: mv s3, a0 @@ -456,24 +456,24 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32M-NEXT: sw s5, 4(sp) # 4-byte Folded Spill ; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32M-NEXT: mv s0, a0 -; RV32M-NEXT: lbu a1, 12(a0) -; RV32M-NEXT: lw a2, 8(a0) -; RV32M-NEXT: lw a3, 4(a0) ; RV32M-NEXT: lw a0, 0(a0) -; RV32M-NEXT: slli a4, a1, 30 +; RV32M-NEXT: lw a1, 4(s0) +; RV32M-NEXT: lw a2, 8(s0) +; RV32M-NEXT: lbu a3, 12(s0) +; RV32M-NEXT: slli a4, a3, 30 ; RV32M-NEXT: srli s1, a2, 2 ; RV32M-NEXT: slli a5, a2, 31 ; RV32M-NEXT: or s1, s1, a4 -; RV32M-NEXT: srli a4, a3, 1 +; RV32M-NEXT: srli a4, a1, 1 ; RV32M-NEXT: or s2, a4, a5 -; RV32M-NEXT: srli a1, a1, 2 +; RV32M-NEXT: srli a3, a3, 2 ; RV32M-NEXT: srli a2, a2, 1 -; RV32M-NEXT: slli a3, a3, 31 ; RV32M-NEXT: slli a1, a1, 31 +; RV32M-NEXT: slli a3, a3, 31 ; RV32M-NEXT: slli a2, a2, 31 -; RV32M-NEXT: srai s3, a1, 31 +; RV32M-NEXT: srai s3, a3, 31 ; RV32M-NEXT: srai s4, a2, 31 -; RV32M-NEXT: srai a1, a3, 31 +; RV32M-NEXT: srai a1, a1, 31 ; RV32M-NEXT: li a2, 6 ; RV32M-NEXT: li a3, 0 ; RV32M-NEXT: call __moddi3 @@ -606,26 +606,26 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: slli a1, a1, 1 ; RV32MV-NEXT: sub sp, sp, a1 ; RV32MV-NEXT: mv s0, a0 -; RV32MV-NEXT: lw a1, 8(a0) -; RV32MV-NEXT: lbu a2, 12(a0) -; RV32MV-NEXT: lw a3, 4(a0) ; RV32MV-NEXT: lw a0, 0(a0) +; RV32MV-NEXT: lw a1, 4(s0) +; RV32MV-NEXT: lw a2, 8(s0) +; RV32MV-NEXT: lbu a3, 12(s0) ; RV32MV-NEXT: li a4, 1 -; RV32MV-NEXT: slli a5, a2, 30 -; RV32MV-NEXT: srli s1, a1, 2 -; RV32MV-NEXT: slli a6, a1, 31 +; RV32MV-NEXT: slli a5, a3, 30 +; RV32MV-NEXT: srli s1, a2, 2 +; RV32MV-NEXT: slli a6, a2, 31 ; RV32MV-NEXT: or s1, s1, a5 -; RV32MV-NEXT: srli a5, a3, 1 +; RV32MV-NEXT: srli a5, a1, 1 ; RV32MV-NEXT: or s2, a5, a6 ; RV32MV-NEXT: li a5, -1 -; RV32MV-NEXT: srli a2, a2, 2 -; RV32MV-NEXT: srli a1, a1, 1 +; RV32MV-NEXT: srli a3, a3, 2 +; RV32MV-NEXT: srli a2, a2, 1 +; RV32MV-NEXT: slli a1, a1, 31 ; RV32MV-NEXT: slli a3, a3, 31 ; RV32MV-NEXT: slli a2, a2, 31 -; RV32MV-NEXT: slli a6, a1, 31 -; RV32MV-NEXT: srai a1, a3, 31 -; RV32MV-NEXT: srai s3, a2, 31 -; RV32MV-NEXT: srai s4, a6, 31 +; RV32MV-NEXT: srai a1, a1, 31 +; RV32MV-NEXT: srai s3, a3, 31 +; RV32MV-NEXT: srai s4, a2, 31 ; RV32MV-NEXT: sw a5, 16(sp) ; RV32MV-NEXT: sw a4, 20(sp) ; RV32MV-NEXT: li a2, 6 @@ -653,17 +653,18 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV32MV-NEXT: mv a0, s1 ; RV32MV-NEXT: mv a1, s3 ; RV32MV-NEXT: call __moddi3 -; RV32MV-NEXT: addi a2, sp, 16 -; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32MV-NEXT: vlse64.v v8, (a2), zero ; RV32MV-NEXT: addi a2, sp, 32 -; RV32MV-NEXT: vl2r.v v10, (a2) # Unknown-size Folded Reload +; RV32MV-NEXT: vl2r.v v8, (a2) # Unknown-size Folded Reload +; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32MV-NEXT: vslide1down.vx v8, v8, a0 +; RV32MV-NEXT: addi a0, sp, 16 +; RV32MV-NEXT: vslide1down.vx v8, v8, a1 +; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV32MV-NEXT: vlse64.v v10, (a0), zero ; RV32MV-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; RV32MV-NEXT: vslide1down.vx v10, v10, a0 -; RV32MV-NEXT: vslide1down.vx v10, v10, a1 -; RV32MV-NEXT: vslidedown.vi v10, v10, 2 +; RV32MV-NEXT: vslidedown.vi v8, v8, 2 ; RV32MV-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32MV-NEXT: vand.vv v8, v10, v8 +; RV32MV-NEXT: vand.vv v8, v8, v10 ; RV32MV-NEXT: vsetivli zero, 3, e8, mf2, ta, ma ; RV32MV-NEXT: vmv.v.i v10, 1 ; RV32MV-NEXT: vsetivli zero, 8, e8, mf2, ta, ma diff --git a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll index cf65d4e0cf805..5cb7e1388a08f 100644 --- a/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/srem-vector-lkk.ll @@ -18,30 +18,29 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh a2, 0(a1) -; RV32I-NEXT: lh s0, 4(a1) -; RV32I-NEXT: lh s1, 8(a1) -; RV32I-NEXT: lh s2, 12(a1) -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lh a0, 0(a1) +; RV32I-NEXT: lh s1, 4(a1) +; RV32I-NEXT: lh s2, 8(a1) +; RV32I-NEXT: lh s3, 12(a1) ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, -124 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: li a1, 98 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: li a1, -1003 +; RV32I-NEXT: li a1, 98 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: sh s4, 0(s3) -; RV32I-NEXT: sh s0, 2(s3) -; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: li a1, -1003 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: sh s4, 0(s0) +; RV32I-NEXT: sh s1, 2(s0) +; RV32I-NEXT: sh s2, 4(s0) +; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -110,30 +109,29 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh a2, 0(a1) -; RV64I-NEXT: lh s0, 8(a1) -; RV64I-NEXT: lh s1, 16(a1) -; RV64I-NEXT: lh s2, 24(a1) -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lh a0, 0(a1) +; RV64I-NEXT: lh s1, 8(a1) +; RV64I-NEXT: lh s2, 16(a1) +; RV64I-NEXT: lh s3, 24(a1) ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, -124 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: li a1, 98 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: li a1, -1003 +; RV64I-NEXT: li a1, 98 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: sh s4, 0(s3) -; RV64I-NEXT: sh s0, 2(s3) -; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: li a1, -1003 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: sh s4, 0(s0) +; RV64I-NEXT: sh s1, 2(s0) +; RV64I-NEXT: sh s2, 4(s0) +; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -206,30 +204,29 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh a2, 0(a1) -; RV32I-NEXT: lh s0, 4(a1) -; RV32I-NEXT: lh s1, 8(a1) -; RV32I-NEXT: lh s2, 12(a1) -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lh a0, 0(a1) +; RV32I-NEXT: lh s1, 4(a1) +; RV32I-NEXT: lh s2, 8(a1) +; RV32I-NEXT: lh s3, 12(a1) ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: sh s4, 0(s3) -; RV32I-NEXT: sh s0, 2(s3) -; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __modsi3 +; RV32I-NEXT: sh s4, 0(s0) +; RV32I-NEXT: sh s1, 2(s0) +; RV32I-NEXT: sh s2, 4(s0) +; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -291,30 +288,29 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh a2, 0(a1) -; RV64I-NEXT: lh s0, 8(a1) -; RV64I-NEXT: lh s1, 16(a1) -; RV64I-NEXT: lh s2, 24(a1) -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lh a0, 0(a1) +; RV64I-NEXT: lh s1, 8(a1) +; RV64I-NEXT: lh s2, 16(a1) +; RV64I-NEXT: lh s3, 24(a1) ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: sh s4, 0(s3) -; RV64I-NEXT: sh s0, 2(s3) -; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __moddi3 +; RV64I-NEXT: sh s4, 0(s0) +; RV64I-NEXT: sh s1, 2(s0) +; RV64I-NEXT: sh s2, 4(s0) +; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -326,20 +322,20 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: fold_srem_vec_2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a2, %hi(.LCPI1_0) -; RV64IM-NEXT: ld a2, %lo(.LCPI1_0)(a2) -; RV64IM-NEXT: lh a3, 0(a1) -; RV64IM-NEXT: lh a4, 8(a1) -; RV64IM-NEXT: lh a5, 16(a1) +; RV64IM-NEXT: lh a2, 0(a1) +; RV64IM-NEXT: lh a3, 8(a1) +; RV64IM-NEXT: lh a4, 16(a1) ; RV64IM-NEXT: lh a1, 24(a1) -; RV64IM-NEXT: mulh a6, a3, a2 -; RV64IM-NEXT: mulh a7, a4, a2 -; RV64IM-NEXT: mulh t0, a5, a2 -; RV64IM-NEXT: mulh a2, a1, a2 -; RV64IM-NEXT: add a6, a6, a3 -; RV64IM-NEXT: add a7, a7, a4 -; RV64IM-NEXT: add t0, t0, a5 -; RV64IM-NEXT: add a2, a2, a1 +; RV64IM-NEXT: lui a5, %hi(.LCPI1_0) +; RV64IM-NEXT: ld a5, %lo(.LCPI1_0)(a5) +; RV64IM-NEXT: mulh a6, a2, a5 +; RV64IM-NEXT: mulh a7, a3, a5 +; RV64IM-NEXT: mulh t0, a4, a5 +; RV64IM-NEXT: mulh a5, a1, a5 +; RV64IM-NEXT: add a6, a6, a2 +; RV64IM-NEXT: add a7, a7, a3 +; RV64IM-NEXT: add t0, t0, a4 +; RV64IM-NEXT: add a5, a5, a1 ; RV64IM-NEXT: srli t1, a6, 63 ; RV64IM-NEXT: srli a6, a6, 6 ; RV64IM-NEXT: add a6, a6, t1 @@ -349,21 +345,21 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) nounwind { ; RV64IM-NEXT: srli t1, t0, 63 ; RV64IM-NEXT: srli t0, t0, 6 ; RV64IM-NEXT: add t0, t0, t1 -; RV64IM-NEXT: srli t1, a2, 63 -; RV64IM-NEXT: srli a2, a2, 6 -; RV64IM-NEXT: add a2, a2, t1 +; RV64IM-NEXT: srli t1, a5, 63 +; RV64IM-NEXT: srli a5, a5, 6 +; RV64IM-NEXT: add a5, a5, t1 ; RV64IM-NEXT: li t1, 95 ; RV64IM-NEXT: mul a6, a6, t1 ; RV64IM-NEXT: mul a7, a7, t1 ; RV64IM-NEXT: mul t0, t0, t1 -; RV64IM-NEXT: mul a2, a2, t1 -; RV64IM-NEXT: subw a3, a3, a6 -; RV64IM-NEXT: subw a4, a4, a7 -; RV64IM-NEXT: subw a5, a5, t0 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: sh a3, 0(a0) -; RV64IM-NEXT: sh a4, 2(a0) -; RV64IM-NEXT: sh a5, 4(a0) +; RV64IM-NEXT: mul a5, a5, t1 +; RV64IM-NEXT: subw a2, a2, a6 +; RV64IM-NEXT: subw a3, a3, a7 +; RV64IM-NEXT: subw a4, a4, t0 +; RV64IM-NEXT: subw a1, a1, a5 +; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, @@ -386,11 +382,11 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lh s1, 0(a1) ; RV32I-NEXT: lh s2, 4(a1) ; RV32I-NEXT: lh s3, 8(a1) ; RV32I-NEXT: lh s4, 12(a1) -; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: call __modsi3 @@ -503,11 +499,11 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lh s1, 0(a1) ; RV64I-NEXT: lh s2, 8(a1) ; RV64I-NEXT: lh s3, 16(a1) ; RV64I-NEXT: lh s4, 24(a1) -; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: call __moddi3 @@ -562,49 +558,49 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: combine_srem_sdiv: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lh a2, 16(a1) -; RV64IM-NEXT: lh a3, 24(a1) -; RV64IM-NEXT: lui a4, %hi(.LCPI2_0) -; RV64IM-NEXT: ld a4, %lo(.LCPI2_0)(a4) -; RV64IM-NEXT: lh a5, 0(a1) -; RV64IM-NEXT: lh a1, 8(a1) +; RV64IM-NEXT: lh a2, 0(a1) +; RV64IM-NEXT: lh a3, 8(a1) +; RV64IM-NEXT: lh a4, 16(a1) +; RV64IM-NEXT: lh a1, 24(a1) +; RV64IM-NEXT: lui a5, %hi(.LCPI2_0) ; RV64IM-NEXT: li a6, 95 -; RV64IM-NEXT: mulh a7, a3, a4 -; RV64IM-NEXT: mulh t0, a2, a4 -; RV64IM-NEXT: mulh t1, a1, a4 -; RV64IM-NEXT: mulh a4, a5, a4 -; RV64IM-NEXT: add a7, a7, a3 -; RV64IM-NEXT: add t0, t0, a2 -; RV64IM-NEXT: add t1, t1, a1 -; RV64IM-NEXT: add a4, a4, a5 +; RV64IM-NEXT: ld a5, %lo(.LCPI2_0)(a5) +; RV64IM-NEXT: mulh a7, a1, a5 +; RV64IM-NEXT: mulh t0, a4, a5 +; RV64IM-NEXT: mulh t1, a3, a5 +; RV64IM-NEXT: mulh a5, a2, a5 +; RV64IM-NEXT: add a7, a7, a1 +; RV64IM-NEXT: add t0, t0, a4 +; RV64IM-NEXT: add t1, t1, a3 +; RV64IM-NEXT: add a5, a5, a2 ; RV64IM-NEXT: srli t2, a7, 63 ; RV64IM-NEXT: srai a7, a7, 6 ; RV64IM-NEXT: srli t3, t0, 63 ; RV64IM-NEXT: srai t0, t0, 6 ; RV64IM-NEXT: srli t4, t1, 63 ; RV64IM-NEXT: srai t1, t1, 6 -; RV64IM-NEXT: srli t5, a4, 63 -; RV64IM-NEXT: srai a4, a4, 6 +; RV64IM-NEXT: srli t5, a5, 63 +; RV64IM-NEXT: srai a5, a5, 6 ; RV64IM-NEXT: add a7, a7, t2 ; RV64IM-NEXT: add t0, t0, t3 ; RV64IM-NEXT: add t1, t1, t4 -; RV64IM-NEXT: add a4, a4, t5 +; RV64IM-NEXT: add a5, a5, t5 ; RV64IM-NEXT: mul t2, a7, a6 ; RV64IM-NEXT: mul t3, t0, a6 ; RV64IM-NEXT: mul t4, t1, a6 -; RV64IM-NEXT: mul a6, a4, a6 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: add a1, a1, t1 -; RV64IM-NEXT: add a2, a2, t0 -; RV64IM-NEXT: add a3, a3, a7 -; RV64IM-NEXT: subw a4, a4, a6 -; RV64IM-NEXT: subw a1, a1, t4 -; RV64IM-NEXT: subw a2, a2, t3 -; RV64IM-NEXT: subw a3, a3, t2 -; RV64IM-NEXT: sh a4, 0(a0) -; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh a2, 4(a0) -; RV64IM-NEXT: sh a3, 6(a0) +; RV64IM-NEXT: mul a6, a5, a6 +; RV64IM-NEXT: add a2, a2, a5 +; RV64IM-NEXT: add a3, a3, t1 +; RV64IM-NEXT: add a4, a4, t0 +; RV64IM-NEXT: add a1, a1, a7 +; RV64IM-NEXT: subw a2, a2, a6 +; RV64IM-NEXT: subw a3, a3, t4 +; RV64IM-NEXT: subw a4, a4, t3 +; RV64IM-NEXT: subw a1, a1, t2 +; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a4, 4(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = srem <4 x i16> %x, %2 = sdiv <4 x i16> %x, @@ -655,36 +651,36 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) nounwind { ; ; RV32IM-LABEL: dont_fold_srem_power_of_two: ; RV32IM: # %bb.0: -; RV32IM-NEXT: lh a2, 4(a1) -; RV32IM-NEXT: lh a3, 8(a1) -; RV32IM-NEXT: lh a4, 12(a1) -; RV32IM-NEXT: lh a1, 0(a1) +; RV32IM-NEXT: lh a2, 0(a1) +; RV32IM-NEXT: lh a3, 4(a1) +; RV32IM-NEXT: lh a4, 8(a1) +; RV32IM-NEXT: lh a1, 12(a1) ; RV32IM-NEXT: lui a5, 706409 ; RV32IM-NEXT: addi a5, a5, 389 -; RV32IM-NEXT: mulh a5, a4, a5 -; RV32IM-NEXT: add a5, a5, a4 +; RV32IM-NEXT: mulh a5, a1, a5 +; RV32IM-NEXT: add a5, a5, a1 ; RV32IM-NEXT: srli a6, a5, 31 ; RV32IM-NEXT: srli a5, a5, 6 ; RV32IM-NEXT: add a5, a5, a6 -; RV32IM-NEXT: srli a6, a1, 26 -; RV32IM-NEXT: add a6, a1, a6 -; RV32IM-NEXT: andi a6, a6, -64 -; RV32IM-NEXT: sub a1, a1, a6 -; RV32IM-NEXT: srli a6, a2, 27 +; RV32IM-NEXT: srli a6, a2, 26 ; RV32IM-NEXT: add a6, a2, a6 -; RV32IM-NEXT: andi a6, a6, -32 +; RV32IM-NEXT: andi a6, a6, -64 ; RV32IM-NEXT: sub a2, a2, a6 -; RV32IM-NEXT: srli a6, a3, 29 +; RV32IM-NEXT: srli a6, a3, 27 ; RV32IM-NEXT: add a6, a3, a6 -; RV32IM-NEXT: andi a6, a6, -8 +; RV32IM-NEXT: andi a6, a6, -32 ; RV32IM-NEXT: sub a3, a3, a6 +; RV32IM-NEXT: srli a6, a4, 29 +; RV32IM-NEXT: add a6, a4, a6 +; RV32IM-NEXT: andi a6, a6, -8 +; RV32IM-NEXT: sub a4, a4, a6 ; RV32IM-NEXT: li a6, 95 ; RV32IM-NEXT: mul a5, a5, a6 -; RV32IM-NEXT: sub a4, a4, a5 -; RV32IM-NEXT: sh a1, 0(a0) -; RV32IM-NEXT: sh a2, 2(a0) -; RV32IM-NEXT: sh a3, 4(a0) -; RV32IM-NEXT: sh a4, 6(a0) +; RV32IM-NEXT: sub a1, a1, a5 +; RV32IM-NEXT: sh a2, 0(a0) +; RV32IM-NEXT: sh a3, 2(a0) +; RV32IM-NEXT: sh a4, 4(a0) +; RV32IM-NEXT: sh a1, 6(a0) ; RV32IM-NEXT: ret ; ; RV64I-LABEL: dont_fold_srem_power_of_two: @@ -773,26 +769,25 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lh a2, 4(a1) -; RV32I-NEXT: lh s0, 8(a1) -; RV32I-NEXT: lh s1, 12(a1) -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lh a0, 4(a1) +; RV32I-NEXT: lh s1, 8(a1) +; RV32I-NEXT: lh s2, 12(a1) ; RV32I-NEXT: li a1, 654 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __modsi3 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 23 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a1, a0, 1327 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __modsi3 -; RV32I-NEXT: sh zero, 0(s2) -; RV32I-NEXT: sh s3, 2(s2) -; RV32I-NEXT: sh s0, 4(s2) -; RV32I-NEXT: sh a0, 6(s2) +; RV32I-NEXT: sh zero, 0(s0) +; RV32I-NEXT: sh s3, 2(s0) +; RV32I-NEXT: sh s1, 4(s0) +; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -850,26 +845,25 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lh a2, 8(a1) -; RV64I-NEXT: lh s0, 16(a1) -; RV64I-NEXT: lh s1, 24(a1) -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lh a0, 8(a1) +; RV64I-NEXT: lh s1, 16(a1) +; RV64I-NEXT: lh s2, 24(a1) ; RV64I-NEXT: li a1, 654 -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 23 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: sh zero, 0(s2) -; RV64I-NEXT: sh s3, 2(s2) -; RV64I-NEXT: sh s0, 4(s2) -; RV64I-NEXT: sh a0, 6(s2) +; RV64I-NEXT: sh zero, 0(s0) +; RV64I-NEXT: sh s3, 2(s0) +; RV64I-NEXT: sh s1, 4(s0) +; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -1036,31 +1030,31 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lh a2, 8(a1) ; RV64IM-NEXT: lh a3, 16(a1) ; RV64IM-NEXT: lh a1, 24(a1) -; RV64IM-NEXT: lui a4, %hi(.LCPI5_0) -; RV64IM-NEXT: lui a5, %hi(.LCPI5_1) -; RV64IM-NEXT: ld a5, %lo(.LCPI5_1)(a5) -; RV64IM-NEXT: lui a6, 8 -; RV64IM-NEXT: ld a4, %lo(.LCPI5_0)(a4) -; RV64IM-NEXT: srli a7, a2, 49 -; RV64IM-NEXT: mulh a5, a1, a5 -; RV64IM-NEXT: add a7, a2, a7 -; RV64IM-NEXT: and a6, a7, a6 -; RV64IM-NEXT: srli a7, a5, 63 -; RV64IM-NEXT: srli a5, a5, 11 -; RV64IM-NEXT: add a5, a5, a7 -; RV64IM-NEXT: mulh a4, a3, a4 -; RV64IM-NEXT: add a4, a4, a3 -; RV64IM-NEXT: subw a2, a2, a6 +; RV64IM-NEXT: lui a4, %hi(.LCPI5_1) +; RV64IM-NEXT: lui a5, 8 +; RV64IM-NEXT: ld a4, %lo(.LCPI5_1)(a4) +; RV64IM-NEXT: srli a6, a2, 49 +; RV64IM-NEXT: mulh a4, a1, a4 +; RV64IM-NEXT: add a6, a2, a6 +; RV64IM-NEXT: and a5, a6, a5 ; RV64IM-NEXT: srli a6, a4, 63 -; RV64IM-NEXT: srli a4, a4, 4 +; RV64IM-NEXT: srli a4, a4, 11 ; RV64IM-NEXT: add a4, a4, a6 +; RV64IM-NEXT: lui a6, %hi(.LCPI5_0) +; RV64IM-NEXT: ld a6, %lo(.LCPI5_0)(a6) +; RV64IM-NEXT: mulh a6, a3, a6 +; RV64IM-NEXT: add a6, a6, a3 +; RV64IM-NEXT: subw a2, a2, a5 +; RV64IM-NEXT: srli a5, a6, 63 +; RV64IM-NEXT: srli a6, a6, 4 +; RV64IM-NEXT: add a5, a6, a5 ; RV64IM-NEXT: lui a6, 1 ; RV64IM-NEXT: addi a6, a6, 1327 -; RV64IM-NEXT: mul a5, a5, a6 -; RV64IM-NEXT: li a6, 23 ; RV64IM-NEXT: mul a4, a4, a6 -; RV64IM-NEXT: subw a1, a1, a5 -; RV64IM-NEXT: subw a3, a3, a4 +; RV64IM-NEXT: li a6, 23 +; RV64IM-NEXT: mul a5, a5, a6 +; RV64IM-NEXT: subw a1, a1, a4 +; RV64IM-NEXT: subw a3, a3, a5 ; RV64IM-NEXT: sh zero, 0(a0) ; RV64IM-NEXT: sh a2, 2(a0) ; RV64IM-NEXT: sh a3, 4(a0) @@ -1085,18 +1079,17 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lw s1, 16(a1) ; RV32I-NEXT: lw s2, 20(a1) ; RV32I-NEXT: lw s3, 24(a1) ; RV32I-NEXT: lw s4, 28(a1) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a0, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw s5, 8(a1) ; RV32I-NEXT: lw s6, 12(a1) -; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a2, 1 -; RV32I-NEXT: mv a0, a3 -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __moddi3 ; RV32I-NEXT: mv s7, a0 @@ -1155,18 +1148,17 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mv s0, a0 ; RV32IM-NEXT: lw s1, 16(a1) ; RV32IM-NEXT: lw s2, 20(a1) ; RV32IM-NEXT: lw s3, 24(a1) ; RV32IM-NEXT: lw s4, 28(a1) -; RV32IM-NEXT: lw a3, 0(a1) -; RV32IM-NEXT: lw a4, 4(a1) +; RV32IM-NEXT: lw a0, 0(a1) +; RV32IM-NEXT: lw a3, 4(a1) ; RV32IM-NEXT: lw s5, 8(a1) ; RV32IM-NEXT: lw s6, 12(a1) -; RV32IM-NEXT: mv s0, a0 ; RV32IM-NEXT: li a2, 1 -; RV32IM-NEXT: mv a0, a3 -; RV32IM-NEXT: mv a1, a4 +; RV32IM-NEXT: mv a1, a3 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __moddi3 ; RV32IM-NEXT: mv s7, a0 @@ -1220,26 +1212,25 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: ld a2, 8(a1) -; RV64I-NEXT: ld s0, 16(a1) -; RV64I-NEXT: ld s1, 24(a1) -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a0, 8(a1) +; RV64I-NEXT: ld s1, 16(a1) +; RV64I-NEXT: ld s2, 24(a1) ; RV64I-NEXT: li a1, 654 -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __moddi3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 23 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __moddi3 -; RV64I-NEXT: sd zero, 0(s2) -; RV64I-NEXT: sd s3, 8(s2) -; RV64I-NEXT: sd s0, 16(s2) -; RV64I-NEXT: sd a0, 24(s2) +; RV64I-NEXT: sd zero, 0(s0) +; RV64I-NEXT: sd s3, 8(s0) +; RV64I-NEXT: sd s1, 16(s0) +; RV64I-NEXT: sd a0, 24(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/stack-slot-size.ll b/llvm/test/CodeGen/RISCV/stack-slot-size.ll index 71ee6d8160a9d..4691cb6032bcc 100644 --- a/llvm/test/CodeGen/RISCV/stack-slot-size.ll +++ b/llvm/test/CodeGen/RISCV/stack-slot-size.ll @@ -21,11 +21,11 @@ define i32 @caller129() nounwind { ; RV32I-NEXT: li a0, 42 ; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw zero, 4(sp) ; RV32I-NEXT: sw zero, 8(sp) ; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: call callee129 ; RV32I-NEXT: lw a0, 24(sp) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -38,10 +38,10 @@ define i32 @caller129() nounwind { ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: li a0, 42 ; RV64I-NEXT: sw a0, 36(sp) -; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: call callee129 ; RV64I-NEXT: lw a0, 36(sp) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -62,11 +62,11 @@ define i32 @caller160() nounwind { ; RV32I-NEXT: li a0, 42 ; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: sw zero, 16(sp) -; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw zero, 4(sp) ; RV32I-NEXT: sw zero, 8(sp) ; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: call callee160 ; RV32I-NEXT: lw a0, 24(sp) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -79,10 +79,10 @@ define i32 @caller160() nounwind { ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: li a0, 42 ; RV64I-NEXT: sw a0, 36(sp) -; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: call callee160 ; RV64I-NEXT: lw a0, 36(sp) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload @@ -104,11 +104,11 @@ define i32 @caller161() nounwind { ; RV32I-NEXT: sw a0, 24(sp) ; RV32I-NEXT: sw zero, 16(sp) ; RV32I-NEXT: sw zero, 20(sp) -; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw zero, 4(sp) ; RV32I-NEXT: sw zero, 8(sp) ; RV32I-NEXT: sw zero, 12(sp) +; RV32I-NEXT: mv a0, sp ; RV32I-NEXT: call callee161 ; RV32I-NEXT: lw a0, 24(sp) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload @@ -121,10 +121,10 @@ define i32 @caller161() nounwind { ; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; RV64I-NEXT: li a0, 42 ; RV64I-NEXT: sw a0, 36(sp) -; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 16(sp) +; RV64I-NEXT: mv a0, sp ; RV64I-NEXT: call callee161 ; RV64I-NEXT: lw a0, 36(sp) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll index cd1aebfea5ce4..27fa059ce5429 100644 --- a/llvm/test/CodeGen/RISCV/stack-store-check.ll +++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll @@ -29,37 +29,37 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw s10, 656(sp) # 4-byte Folded Spill ; CHECK-NEXT: sw s11, 652(sp) # 4-byte Folded Spill ; CHECK-NEXT: lui a0, %hi(U) -; CHECK-NEXT: lw s9, %lo(U)(a0) -; CHECK-NEXT: lw s10, %lo(U+4)(a0) -; CHECK-NEXT: lw s11, %lo(U+8)(a0) -; CHECK-NEXT: lw s5, %lo(U+12)(a0) +; CHECK-NEXT: lw s6, %lo(U)(a0) +; CHECK-NEXT: lw s7, %lo(U+4)(a0) +; CHECK-NEXT: lw s8, %lo(U+8)(a0) +; CHECK-NEXT: lw s0, %lo(U+12)(a0) ; CHECK-NEXT: sw zero, 616(sp) ; CHECK-NEXT: sw zero, 620(sp) ; CHECK-NEXT: sw zero, 624(sp) ; CHECK-NEXT: sw zero, 628(sp) +; CHECK-NEXT: sw s6, 600(sp) +; CHECK-NEXT: sw s7, 604(sp) +; CHECK-NEXT: sw s8, 608(sp) +; CHECK-NEXT: sw s0, 612(sp) ; CHECK-NEXT: addi a0, sp, 632 ; CHECK-NEXT: addi a1, sp, 616 ; CHECK-NEXT: addi a2, sp, 600 -; CHECK-NEXT: sw s9, 600(sp) -; CHECK-NEXT: sw s10, 604(sp) -; CHECK-NEXT: sw s11, 608(sp) -; CHECK-NEXT: sw s5, 612(sp) ; CHECK-NEXT: call __subtf3 ; CHECK-NEXT: lw s1, 632(sp) ; CHECK-NEXT: lw s2, 636(sp) ; CHECK-NEXT: lw s3, 640(sp) ; CHECK-NEXT: lw s4, 644(sp) -; CHECK-NEXT: sw s9, 552(sp) -; CHECK-NEXT: sw s10, 556(sp) -; CHECK-NEXT: sw s11, 560(sp) -; CHECK-NEXT: sw s5, 564(sp) -; CHECK-NEXT: addi a0, sp, 584 -; CHECK-NEXT: addi a1, sp, 568 -; CHECK-NEXT: addi a2, sp, 552 +; CHECK-NEXT: sw s6, 552(sp) +; CHECK-NEXT: sw s7, 556(sp) +; CHECK-NEXT: sw s8, 560(sp) +; CHECK-NEXT: sw s0, 564(sp) ; CHECK-NEXT: sw s1, 568(sp) ; CHECK-NEXT: sw s2, 572(sp) ; CHECK-NEXT: sw s3, 576(sp) ; CHECK-NEXT: sw s4, 580(sp) +; CHECK-NEXT: addi a0, sp, 584 +; CHECK-NEXT: addi a1, sp, 568 +; CHECK-NEXT: addi a2, sp, 552 ; CHECK-NEXT: call __subtf3 ; CHECK-NEXT: lw a0, 584(sp) ; CHECK-NEXT: sw a0, 52(sp) # 4-byte Folded Spill @@ -73,18 +73,22 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw zero, 508(sp) ; CHECK-NEXT: sw zero, 512(sp) ; CHECK-NEXT: sw zero, 516(sp) +; CHECK-NEXT: sw s6, 520(sp) +; CHECK-NEXT: sw s7, 524(sp) +; CHECK-NEXT: sw s8, 528(sp) +; CHECK-NEXT: sw s0, 532(sp) ; CHECK-NEXT: addi a0, sp, 536 ; CHECK-NEXT: addi a1, sp, 520 ; CHECK-NEXT: addi a2, sp, 504 -; CHECK-NEXT: sw s9, 520(sp) -; CHECK-NEXT: sw s10, 524(sp) -; CHECK-NEXT: sw s11, 528(sp) -; CHECK-NEXT: sw s5, 532(sp) ; CHECK-NEXT: call __addtf3 -; CHECK-NEXT: lw s0, 536(sp) -; CHECK-NEXT: lw s6, 540(sp) -; CHECK-NEXT: lw s7, 544(sp) -; CHECK-NEXT: lw s8, 548(sp) +; CHECK-NEXT: lw s5, 536(sp) +; CHECK-NEXT: sw s5, 36(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw s9, 540(sp) +; CHECK-NEXT: sw s9, 32(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw s10, 544(sp) +; CHECK-NEXT: sw s10, 28(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw s11, 548(sp) +; CHECK-NEXT: sw s11, 24(sp) # 4-byte Folded Spill ; CHECK-NEXT: lui a0, %hi(Y1) ; CHECK-NEXT: lw a1, %lo(Y1)(a0) ; CHECK-NEXT: sw a1, 20(sp) # 4-byte Folded Spill @@ -98,13 +102,13 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw a2, 316(sp) ; CHECK-NEXT: sw a3, 320(sp) ; CHECK-NEXT: sw a0, 324(sp) -; CHECK-NEXT: addi a0, sp, 344 -; CHECK-NEXT: addi a1, sp, 328 -; CHECK-NEXT: addi a2, sp, 312 ; CHECK-NEXT: sw s1, 328(sp) ; CHECK-NEXT: sw s2, 332(sp) ; CHECK-NEXT: sw s3, 336(sp) ; CHECK-NEXT: sw s4, 340(sp) +; CHECK-NEXT: addi a0, sp, 344 +; CHECK-NEXT: addi a1, sp, 328 +; CHECK-NEXT: addi a2, sp, 312 ; CHECK-NEXT: call __multf3 ; CHECK-NEXT: lw a0, 344(sp) ; CHECK-NEXT: sw a0, 68(sp) # 4-byte Folded Spill @@ -114,180 +118,176 @@ define void @main() local_unnamed_addr nounwind { ; CHECK-NEXT: sw a0, 60(sp) # 4-byte Folded Spill ; CHECK-NEXT: lw a0, 356(sp) ; CHECK-NEXT: sw a0, 56(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s9, 472(sp) -; CHECK-NEXT: sw s10, 476(sp) -; CHECK-NEXT: sw s11, 480(sp) -; CHECK-NEXT: sw s5, 484(sp) +; CHECK-NEXT: sw s6, 472(sp) +; CHECK-NEXT: sw s7, 476(sp) +; CHECK-NEXT: sw s8, 480(sp) +; CHECK-NEXT: sw s0, 484(sp) +; CHECK-NEXT: sw s5, 456(sp) +; CHECK-NEXT: sw s9, 460(sp) +; CHECK-NEXT: sw s10, 464(sp) +; CHECK-NEXT: sw s11, 468(sp) ; CHECK-NEXT: addi a0, sp, 488 ; CHECK-NEXT: addi a1, sp, 472 ; CHECK-NEXT: addi a2, sp, 456 -; CHECK-NEXT: sw s0, 456(sp) -; CHECK-NEXT: sw s6, 460(sp) -; CHECK-NEXT: sw s7, 464(sp) -; CHECK-NEXT: sw s8, 468(sp) ; CHECK-NEXT: call __addtf3 -; CHECK-NEXT: lw a3, 488(sp) -; CHECK-NEXT: lw a4, 492(sp) -; CHECK-NEXT: lw a5, 496(sp) -; CHECK-NEXT: lw a6, 500(sp) +; CHECK-NEXT: lw a0, 488(sp) +; CHECK-NEXT: lw a1, 492(sp) +; CHECK-NEXT: lw a2, 496(sp) +; CHECK-NEXT: lw a3, 500(sp) ; CHECK-NEXT: sw zero, 424(sp) ; CHECK-NEXT: sw zero, 428(sp) ; CHECK-NEXT: sw zero, 432(sp) ; CHECK-NEXT: sw zero, 436(sp) +; CHECK-NEXT: sw a0, 408(sp) +; CHECK-NEXT: sw a1, 412(sp) +; CHECK-NEXT: sw a2, 416(sp) +; CHECK-NEXT: sw a3, 420(sp) ; CHECK-NEXT: addi a0, sp, 440 ; CHECK-NEXT: addi a1, sp, 424 ; CHECK-NEXT: addi a2, sp, 408 -; CHECK-NEXT: sw a3, 408(sp) -; CHECK-NEXT: sw a4, 412(sp) -; CHECK-NEXT: sw a5, 416(sp) -; CHECK-NEXT: sw a6, 420(sp) ; CHECK-NEXT: call __subtf3 -; CHECK-NEXT: lw a0, 448(sp) -; CHECK-NEXT: lw a1, 452(sp) -; CHECK-NEXT: lw a2, 440(sp) -; CHECK-NEXT: lw a3, 444(sp) +; CHECK-NEXT: lw a0, 440(sp) +; CHECK-NEXT: lw a1, 444(sp) +; CHECK-NEXT: lw a2, 448(sp) +; CHECK-NEXT: lw a3, 452(sp) ; CHECK-NEXT: lui a4, %hi(X) -; CHECK-NEXT: sw a1, %lo(X+12)(a4) -; CHECK-NEXT: sw a0, %lo(X+8)(a4) -; CHECK-NEXT: sw a3, %lo(X+4)(a4) -; CHECK-NEXT: sw a2, %lo(X)(a4) -; CHECK-NEXT: lw s5, 20(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s5, 216(sp) -; CHECK-NEXT: lw s9, 16(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s9, 220(sp) -; CHECK-NEXT: lw s10, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s10, 224(sp) -; CHECK-NEXT: lw s11, 8(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s11, 228(sp) +; CHECK-NEXT: sw a3, %lo(X+12)(a4) +; CHECK-NEXT: sw a2, %lo(X+8)(a4) +; CHECK-NEXT: sw a1, %lo(X+4)(a4) +; CHECK-NEXT: sw a0, %lo(X)(a4) +; CHECK-NEXT: lw s1, 20(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s1, 216(sp) +; CHECK-NEXT: lw s2, 16(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s2, 220(sp) +; CHECK-NEXT: lw s3, 12(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s3, 224(sp) +; CHECK-NEXT: lw s4, 8(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s4, 228(sp) +; CHECK-NEXT: lw s5, 52(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s5, 232(sp) +; CHECK-NEXT: lw s9, 48(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s9, 236(sp) +; CHECK-NEXT: lw s10, 44(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s10, 240(sp) +; CHECK-NEXT: lw s11, 40(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw s11, 244(sp) ; CHECK-NEXT: addi a0, sp, 248 ; CHECK-NEXT: addi a1, sp, 232 ; CHECK-NEXT: addi a2, sp, 216 -; CHECK-NEXT: lw s1, 52(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s1, 232(sp) -; CHECK-NEXT: lw s2, 48(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s2, 236(sp) -; CHECK-NEXT: lw s3, 44(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s3, 240(sp) -; CHECK-NEXT: lw s4, 40(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw s4, 244(sp) ; CHECK-NEXT: call __multf3 -; CHECK-NEXT: lw a0, 248(sp) -; CHECK-NEXT: sw a0, 36(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw a0, 252(sp) -; CHECK-NEXT: sw a0, 32(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw a0, 256(sp) -; CHECK-NEXT: sw a0, 28(sp) # 4-byte Folded Spill -; CHECK-NEXT: lw a0, 260(sp) -; CHECK-NEXT: sw a0, 24(sp) # 4-byte Folded Spill +; CHECK-NEXT: lw s0, 248(sp) +; CHECK-NEXT: lw s6, 252(sp) +; CHECK-NEXT: lw s7, 256(sp) +; CHECK-NEXT: lw s8, 260(sp) ; CHECK-NEXT: sw zero, 360(sp) ; CHECK-NEXT: sw zero, 364(sp) ; CHECK-NEXT: sw zero, 368(sp) ; CHECK-NEXT: sw zero, 372(sp) +; CHECK-NEXT: lw a0, 36(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a0, 376(sp) +; CHECK-NEXT: lw a0, 32(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a0, 380(sp) +; CHECK-NEXT: lw a0, 28(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a0, 384(sp) +; CHECK-NEXT: lw a0, 24(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a0, 388(sp) ; CHECK-NEXT: addi a0, sp, 392 ; CHECK-NEXT: addi a1, sp, 376 ; CHECK-NEXT: addi a2, sp, 360 -; CHECK-NEXT: sw s0, 376(sp) -; CHECK-NEXT: sw s6, 380(sp) -; CHECK-NEXT: sw s7, 384(sp) -; CHECK-NEXT: sw s8, 388(sp) ; CHECK-NEXT: call __multf3 -; CHECK-NEXT: lw a0, 400(sp) -; CHECK-NEXT: lw a1, 404(sp) -; CHECK-NEXT: lw a2, 392(sp) -; CHECK-NEXT: lw a3, 396(sp) +; CHECK-NEXT: lw a0, 392(sp) +; CHECK-NEXT: lw a1, 396(sp) +; CHECK-NEXT: lw a2, 400(sp) +; CHECK-NEXT: lw a3, 404(sp) ; CHECK-NEXT: lui a4, %hi(S) -; CHECK-NEXT: sw a1, %lo(S+12)(a4) -; CHECK-NEXT: sw a0, %lo(S+8)(a4) -; CHECK-NEXT: sw a3, %lo(S+4)(a4) -; CHECK-NEXT: sw a2, %lo(S)(a4) -; CHECK-NEXT: sw s1, 264(sp) -; CHECK-NEXT: sw s2, 268(sp) -; CHECK-NEXT: sw s3, 272(sp) -; CHECK-NEXT: sw s4, 276(sp) +; CHECK-NEXT: sw a3, %lo(S+12)(a4) +; CHECK-NEXT: sw a2, %lo(S+8)(a4) +; CHECK-NEXT: sw a1, %lo(S+4)(a4) +; CHECK-NEXT: sw a0, %lo(S)(a4) +; CHECK-NEXT: sw s5, 264(sp) +; CHECK-NEXT: sw s9, 268(sp) +; CHECK-NEXT: sw s10, 272(sp) +; CHECK-NEXT: sw s11, 276(sp) +; CHECK-NEXT: lw a0, 68(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a0, 280(sp) +; CHECK-NEXT: lw a0, 64(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a0, 284(sp) +; CHECK-NEXT: lw a0, 60(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a0, 288(sp) +; CHECK-NEXT: lw a0, 56(sp) # 4-byte Folded Reload +; CHECK-NEXT: sw a0, 292(sp) ; CHECK-NEXT: addi a0, sp, 296 ; CHECK-NEXT: addi a1, sp, 280 ; CHECK-NEXT: addi a2, sp, 264 -; CHECK-NEXT: lw a3, 68(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a3, 280(sp) -; CHECK-NEXT: lw a3, 64(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a3, 284(sp) -; CHECK-NEXT: lw a3, 60(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a3, 288(sp) -; CHECK-NEXT: lw a3, 56(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a3, 292(sp) ; CHECK-NEXT: call __subtf3 -; CHECK-NEXT: lw a0, 304(sp) -; CHECK-NEXT: lw a1, 308(sp) -; CHECK-NEXT: lw a2, 296(sp) -; CHECK-NEXT: lw a3, 300(sp) +; CHECK-NEXT: lw a0, 296(sp) +; CHECK-NEXT: lw a1, 300(sp) +; CHECK-NEXT: lw a2, 304(sp) +; CHECK-NEXT: lw a3, 308(sp) ; CHECK-NEXT: lui a4, %hi(T) -; CHECK-NEXT: sw a1, %lo(T+12)(a4) -; CHECK-NEXT: sw a0, %lo(T+8)(a4) -; CHECK-NEXT: sw a3, %lo(T+4)(a4) -; CHECK-NEXT: sw a2, %lo(T)(a4) +; CHECK-NEXT: sw a3, %lo(T+12)(a4) +; CHECK-NEXT: sw a2, %lo(T+8)(a4) +; CHECK-NEXT: sw a1, %lo(T+4)(a4) +; CHECK-NEXT: sw a0, %lo(T)(a4) ; CHECK-NEXT: sw zero, 168(sp) ; CHECK-NEXT: sw zero, 172(sp) ; CHECK-NEXT: sw zero, 176(sp) ; CHECK-NEXT: sw zero, 180(sp) +; CHECK-NEXT: sw s0, 184(sp) +; CHECK-NEXT: sw s6, 188(sp) +; CHECK-NEXT: sw s7, 192(sp) +; CHECK-NEXT: sw s8, 196(sp) ; CHECK-NEXT: addi a0, sp, 200 ; CHECK-NEXT: addi a1, sp, 184 ; CHECK-NEXT: addi a2, sp, 168 -; CHECK-NEXT: lw a3, 36(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a3, 184(sp) -; CHECK-NEXT: lw a3, 32(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a3, 188(sp) -; CHECK-NEXT: lw a3, 28(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a3, 192(sp) -; CHECK-NEXT: lw a3, 24(sp) # 4-byte Folded Reload -; CHECK-NEXT: sw a3, 196(sp) ; CHECK-NEXT: call __addtf3 -; CHECK-NEXT: lw a0, 208(sp) -; CHECK-NEXT: lw a1, 212(sp) -; CHECK-NEXT: lw a2, 200(sp) -; CHECK-NEXT: lw a3, 204(sp) +; CHECK-NEXT: lw a0, 200(sp) +; CHECK-NEXT: lw a1, 204(sp) +; CHECK-NEXT: lw a2, 208(sp) +; CHECK-NEXT: lw a3, 212(sp) ; CHECK-NEXT: lui a4, %hi(Y) -; CHECK-NEXT: sw a1, %lo(Y+12)(a4) -; CHECK-NEXT: sw a0, %lo(Y+8)(a4) -; CHECK-NEXT: sw a3, %lo(Y+4)(a4) -; CHECK-NEXT: sw a2, %lo(Y)(a4) +; CHECK-NEXT: sw a3, %lo(Y+12)(a4) +; CHECK-NEXT: sw a2, %lo(Y+8)(a4) +; CHECK-NEXT: sw a1, %lo(Y+4)(a4) +; CHECK-NEXT: sw a0, %lo(Y)(a4) ; CHECK-NEXT: sw zero, 120(sp) ; CHECK-NEXT: sw zero, 124(sp) ; CHECK-NEXT: sw zero, 128(sp) ; CHECK-NEXT: sw zero, 132(sp) +; CHECK-NEXT: sw s1, 136(sp) +; CHECK-NEXT: sw s2, 140(sp) +; CHECK-NEXT: sw s3, 144(sp) +; CHECK-NEXT: sw s4, 148(sp) ; CHECK-NEXT: addi a0, sp, 152 ; CHECK-NEXT: addi a1, sp, 136 ; CHECK-NEXT: addi a2, sp, 120 -; CHECK-NEXT: sw s5, 136(sp) -; CHECK-NEXT: sw s9, 140(sp) -; CHECK-NEXT: sw s10, 144(sp) -; CHECK-NEXT: sw s11, 148(sp) ; CHECK-NEXT: call __multf3 -; CHECK-NEXT: lw a3, 152(sp) -; CHECK-NEXT: lw a4, 156(sp) -; CHECK-NEXT: lw a5, 160(sp) -; CHECK-NEXT: lw a6, 164(sp) -; CHECK-NEXT: lui a2, 786400 +; CHECK-NEXT: lw a2, 152(sp) +; CHECK-NEXT: lw a3, 156(sp) +; CHECK-NEXT: lw a4, 160(sp) +; CHECK-NEXT: lw a5, 164(sp) +; CHECK-NEXT: lui a1, 786400 ; CHECK-NEXT: addi a0, sp, 104 -; CHECK-NEXT: addi a1, sp, 88 ; CHECK-NEXT: sw zero, 72(sp) ; CHECK-NEXT: sw zero, 76(sp) ; CHECK-NEXT: sw zero, 80(sp) -; CHECK-NEXT: sw a2, 84(sp) +; CHECK-NEXT: sw a1, 84(sp) +; CHECK-NEXT: addi a1, sp, 88 +; CHECK-NEXT: sw a2, 88(sp) +; CHECK-NEXT: sw a3, 92(sp) +; CHECK-NEXT: sw a4, 96(sp) +; CHECK-NEXT: sw a5, 100(sp) ; CHECK-NEXT: addi a2, sp, 72 -; CHECK-NEXT: sw a3, 88(sp) -; CHECK-NEXT: sw a4, 92(sp) -; CHECK-NEXT: sw a5, 96(sp) -; CHECK-NEXT: sw a6, 100(sp) ; CHECK-NEXT: call __addtf3 -; CHECK-NEXT: lw a0, 112(sp) -; CHECK-NEXT: lw a1, 116(sp) -; CHECK-NEXT: lw a2, 104(sp) -; CHECK-NEXT: lw a3, 108(sp) +; CHECK-NEXT: lw a0, 104(sp) +; CHECK-NEXT: lw a1, 108(sp) +; CHECK-NEXT: lw a2, 112(sp) +; CHECK-NEXT: lw a3, 116(sp) ; CHECK-NEXT: lui a4, %hi(Y1) -; CHECK-NEXT: sw a0, %lo(Y1+8)(a4) -; CHECK-NEXT: sw a1, %lo(Y1+12)(a4) -; CHECK-NEXT: sw a2, %lo(Y1)(a4) -; CHECK-NEXT: sw a3, %lo(Y1+4)(a4) +; CHECK-NEXT: sw a2, %lo(Y1+8)(a4) +; CHECK-NEXT: sw a3, %lo(Y1+12)(a4) +; CHECK-NEXT: sw a0, %lo(Y1)(a4) +; CHECK-NEXT: sw a1, %lo(Y1+4)(a4) ; CHECK-NEXT: lw ra, 700(sp) # 4-byte Folded Reload ; CHECK-NEXT: lw s0, 696(sp) # 4-byte Folded Reload ; CHECK-NEXT: lw s1, 692(sp) # 4-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/tail-calls.ll b/llvm/test/CodeGen/RISCV/tail-calls.ll index 366b37ac5d472..a6acb2827acea 100644 --- a/llvm/test/CodeGen/RISCV/tail-calls.ll +++ b/llvm/test/CodeGen/RISCV/tail-calls.ll @@ -30,27 +30,23 @@ declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) define void @caller_extern(ptr %src) optsize { ; CHECK-LABEL: caller_extern: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lui a1, %hi(dest) -; CHECK-NEXT: addi a1, a1, %lo(dest) +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: lui a0, %hi(dest) +; CHECK-NEXT: addi a0, a0, %lo(dest) ; CHECK-NEXT: li a2, 7 -; CHECK-NEXT: mv a3, a0 -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: tail memcpy ; ; CHECK-LARGE-ZICFILP-LABEL: caller_extern: ; CHECK-LARGE-ZICFILP: # %bb.0: # %entry ; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: mv a1, a0 ; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi1: -; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI1_0) +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI1_0) ; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi2: ; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI1_1) -; CHECK-LARGE-ZICFILP-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi1)(a1) +; CHECK-LARGE-ZICFILP-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi1)(a0) ; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi2)(a2) ; CHECK-LARGE-ZICFILP-NEXT: li a2, 7 -; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 -; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 -; CHECK-LARGE-ZICFILP-NEXT: mv a1, a3 ; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: tail call void @llvm.memcpy.p0.p0.i32(ptr @dest, ptr %src, i32 7, i1 false) @@ -62,27 +58,23 @@ entry: define void @caller_extern_pgso(ptr %src) !prof !14 { ; CHECK-LABEL: caller_extern_pgso: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lui a1, %hi(dest_pgso) -; CHECK-NEXT: addi a1, a1, %lo(dest_pgso) +; CHECK-NEXT: mv a1, a0 +; CHECK-NEXT: lui a0, %hi(dest_pgso) +; CHECK-NEXT: addi a0, a0, %lo(dest_pgso) ; CHECK-NEXT: li a2, 7 -; CHECK-NEXT: mv a3, a0 -; CHECK-NEXT: mv a0, a1 -; CHECK-NEXT: mv a1, a3 ; CHECK-NEXT: tail memcpy ; ; CHECK-LARGE-ZICFILP-LABEL: caller_extern_pgso: ; CHECK-LARGE-ZICFILP: # %bb.0: # %entry ; CHECK-LARGE-ZICFILP-NEXT: lpad 0 +; CHECK-LARGE-ZICFILP-NEXT: mv a1, a0 ; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi3: -; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI2_0) +; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI2_0) ; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi4: ; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI2_1) -; CHECK-LARGE-ZICFILP-NEXT: lw a1, %pcrel_lo(.Lpcrel_hi3)(a1) +; CHECK-LARGE-ZICFILP-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi3)(a0) ; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi4)(a2) ; CHECK-LARGE-ZICFILP-NEXT: li a2, 7 -; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 -; CHECK-LARGE-ZICFILP-NEXT: mv a0, a1 -; CHECK-LARGE-ZICFILP-NEXT: mv a1, a3 ; CHECK-LARGE-ZICFILP-NEXT: jr t2 entry: tail call void @llvm.memcpy.p0.p0.i32(ptr @dest_pgso, ptr %src, i32 7, i1 false) @@ -181,10 +173,10 @@ define void @caller_varargs(i32 %a, i32 %b) nounwind { ; CHECK-LARGE-ZICFILP-NEXT: lpad 0 ; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -16 ; CHECK-LARGE-ZICFILP-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; CHECK-LARGE-ZICFILP-NEXT: sw a0, 0(sp) ; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi7: ; CHECK-LARGE-ZICFILP-NEXT: auipc a2, %pcrel_hi(.LCPI5_0) ; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi7)(a2) -; CHECK-LARGE-ZICFILP-NEXT: sw a0, 0(sp) ; CHECK-LARGE-ZICFILP-NEXT: mv a2, a1 ; CHECK-LARGE-ZICFILP-NEXT: mv a3, a0 ; CHECK-LARGE-ZICFILP-NEXT: mv a4, a0 @@ -231,19 +223,19 @@ define i32 @caller_args(i32 %a, i32 %b, i32 %c, i32 %dd, i32 %e, i32 %ff, i32 %g ; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill ; CHECK-LARGE-ZICFILP-NEXT: lw t0, 32(sp) ; CHECK-LARGE-ZICFILP-NEXT: lw t1, 36(sp) -; CHECK-LARGE-ZICFILP-NEXT: lw t3, 40(sp) -; CHECK-LARGE-ZICFILP-NEXT: lw t4, 44(sp) -; CHECK-LARGE-ZICFILP-NEXT: lw t2, 48(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, 40(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t3, 44(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t4, 48(sp) ; CHECK-LARGE-ZICFILP-NEXT: lw t5, 52(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t2, 16(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t4, 16(sp) ; CHECK-LARGE-ZICFILP-NEXT: sw t5, 20(sp) -; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8: -; CHECK-LARGE-ZICFILP-NEXT: auipc t2, %pcrel_hi(.LCPI6_0) -; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t2) ; CHECK-LARGE-ZICFILP-NEXT: sw t0, 0(sp) ; CHECK-LARGE-ZICFILP-NEXT: sw t1, 4(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t3, 8(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw t4, 12(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t2, 8(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw t3, 12(sp) +; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi8: +; CHECK-LARGE-ZICFILP-NEXT: auipc t0, %pcrel_hi(.LCPI6_0) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi8)(t0) ; CHECK-LARGE-ZICFILP-NEXT: jalr t2 ; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 @@ -260,12 +252,12 @@ define void @caller_indirect_args() nounwind { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: addi sp, sp, -32 ; CHECK-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; CHECK-NEXT: lui a1, 262128 -; CHECK-NEXT: mv a0, sp +; CHECK-NEXT: lui a0, 262128 ; CHECK-NEXT: sw zero, 0(sp) ; CHECK-NEXT: sw zero, 4(sp) ; CHECK-NEXT: sw zero, 8(sp) -; CHECK-NEXT: sw a1, 12(sp) +; CHECK-NEXT: sw a0, 12(sp) +; CHECK-NEXT: mv a0, sp ; CHECK-NEXT: call callee_indirect_args ; CHECK-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-NEXT: addi sp, sp, 32 @@ -276,15 +268,15 @@ define void @caller_indirect_args() nounwind { ; CHECK-LARGE-ZICFILP-NEXT: lpad 0 ; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, -32 ; CHECK-LARGE-ZICFILP-NEXT: sw ra, 28(sp) # 4-byte Folded Spill -; CHECK-LARGE-ZICFILP-NEXT: lui a1, 262128 +; CHECK-LARGE-ZICFILP-NEXT: lui a0, 262128 ; CHECK-LARGE-ZICFILP-NEXT: .Lpcrel_hi9: -; CHECK-LARGE-ZICFILP-NEXT: auipc a0, %pcrel_hi(.LCPI7_0) -; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi9)(a0) -; CHECK-LARGE-ZICFILP-NEXT: mv a0, sp +; CHECK-LARGE-ZICFILP-NEXT: auipc a1, %pcrel_hi(.LCPI7_0) ; CHECK-LARGE-ZICFILP-NEXT: sw zero, 0(sp) ; CHECK-LARGE-ZICFILP-NEXT: sw zero, 4(sp) ; CHECK-LARGE-ZICFILP-NEXT: sw zero, 8(sp) -; CHECK-LARGE-ZICFILP-NEXT: sw a1, 12(sp) +; CHECK-LARGE-ZICFILP-NEXT: sw a0, 12(sp) +; CHECK-LARGE-ZICFILP-NEXT: lw t2, %pcrel_lo(.Lpcrel_hi9)(a1) +; CHECK-LARGE-ZICFILP-NEXT: mv a0, sp ; CHECK-LARGE-ZICFILP-NEXT: jalr t2 ; CHECK-LARGE-ZICFILP-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; CHECK-LARGE-ZICFILP-NEXT: addi sp, sp, 32 diff --git a/llvm/test/CodeGen/RISCV/ucmp.ll b/llvm/test/CodeGen/RISCV/ucmp.ll index 50da56fbc5951..e28d98bf3047e 100644 --- a/llvm/test/CodeGen/RISCV/ucmp.ll +++ b/llvm/test/CodeGen/RISCV/ucmp.ll @@ -89,15 +89,15 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind { ; RV32I-NEXT: lw a2, 4(a1) ; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: lw a5, 12(a1) -; RV32I-NEXT: lw a6, 12(a0) ; RV32I-NEXT: lw a3, 4(a0) -; RV32I-NEXT: lw a7, 8(a0) -; RV32I-NEXT: beq a6, a5, .LBB4_2 +; RV32I-NEXT: lw a6, 8(a0) +; RV32I-NEXT: lw a7, 12(a0) +; RV32I-NEXT: beq a7, a5, .LBB4_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t2, a6, a5 +; RV32I-NEXT: sltu t2, a7, a5 ; RV32I-NEXT: j .LBB4_3 ; RV32I-NEXT: .LBB4_2: -; RV32I-NEXT: sltu t2, a7, a4 +; RV32I-NEXT: sltu t2, a6, a4 ; RV32I-NEXT: .LBB4_3: ; RV32I-NEXT: lw a1, 0(a1) ; RV32I-NEXT: lw t0, 0(a0) @@ -108,23 +108,23 @@ define i8 @ucmp.8.128(i128 %x, i128 %y) nounwind { ; RV32I-NEXT: .LBB4_5: ; RV32I-NEXT: sltu a0, t0, a1 ; RV32I-NEXT: .LBB4_6: -; RV32I-NEXT: xor t1, a6, a5 -; RV32I-NEXT: xor t3, a7, a4 +; RV32I-NEXT: xor t1, a7, a5 +; RV32I-NEXT: xor t3, a6, a4 ; RV32I-NEXT: or t1, t3, t1 ; RV32I-NEXT: beqz t1, .LBB4_8 ; RV32I-NEXT: # %bb.7: ; RV32I-NEXT: mv a0, t2 ; RV32I-NEXT: .LBB4_8: -; RV32I-NEXT: beq a6, a5, .LBB4_11 +; RV32I-NEXT: beq a7, a5, .LBB4_11 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: sltu a4, a5, a6 +; RV32I-NEXT: sltu a4, a5, a7 ; RV32I-NEXT: bne a3, a2, .LBB4_12 ; RV32I-NEXT: .LBB4_10: ; RV32I-NEXT: sltu a1, a1, t0 ; RV32I-NEXT: bnez t1, .LBB4_13 ; RV32I-NEXT: j .LBB4_14 ; RV32I-NEXT: .LBB4_11: -; RV32I-NEXT: sltu a4, a4, a7 +; RV32I-NEXT: sltu a4, a4, a6 ; RV32I-NEXT: beq a3, a2, .LBB4_10 ; RV32I-NEXT: .LBB4_12: ; RV32I-NEXT: sltu a1, a2, a3 diff --git a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll index 1cdfaa5c4154b..01a8a66f53f15 100644 --- a/llvm/test/CodeGen/RISCV/unaligned-load-store.ll +++ b/llvm/test/CodeGen/RISCV/unaligned-load-store.ll @@ -26,10 +26,10 @@ define i8 @load_i8(ptr %p) { define i16 @load_i16(ptr %p) { ; SLOW-LABEL: load_i16: ; SLOW: # %bb.0: -; SLOW-NEXT: lbu a1, 1(a0) -; SLOW-NEXT: lbu a0, 0(a0) -; SLOW-NEXT: slli a1, a1, 8 -; SLOW-NEXT: or a0, a1, a0 +; SLOW-NEXT: lbu a1, 0(a0) +; SLOW-NEXT: lbu a0, 1(a0) +; SLOW-NEXT: slli a0, a0, 8 +; SLOW-NEXT: or a0, a0, a1 ; SLOW-NEXT: ret ; ; FAST-LABEL: load_i16: @@ -43,11 +43,11 @@ define i16 @load_i16(ptr %p) { define i24 @load_i24(ptr %p) { ; SLOWBASE-LABEL: load_i24: ; SLOWBASE: # %bb.0: -; SLOWBASE-NEXT: lbu a1, 1(a0) -; SLOWBASE-NEXT: lbu a2, 0(a0) +; SLOWBASE-NEXT: lbu a1, 0(a0) +; SLOWBASE-NEXT: lbu a2, 1(a0) ; SLOWBASE-NEXT: lbu a0, 2(a0) -; SLOWBASE-NEXT: slli a1, a1, 8 -; SLOWBASE-NEXT: or a1, a1, a2 +; SLOWBASE-NEXT: slli a2, a2, 8 +; SLOWBASE-NEXT: or a1, a2, a1 ; SLOWBASE-NEXT: slli a0, a0, 16 ; SLOWBASE-NEXT: or a0, a1, a0 ; SLOWBASE-NEXT: ret @@ -73,10 +73,10 @@ define i24 @load_i24(ptr %p) { ; ; FAST-LABEL: load_i24: ; FAST: # %bb.0: -; FAST-NEXT: lbu a1, 2(a0) -; FAST-NEXT: lhu a0, 0(a0) -; FAST-NEXT: slli a1, a1, 16 -; FAST-NEXT: or a0, a0, a1 +; FAST-NEXT: lhu a1, 0(a0) +; FAST-NEXT: lbu a0, 2(a0) +; FAST-NEXT: slli a0, a0, 16 +; FAST-NEXT: or a0, a1, a0 ; FAST-NEXT: ret %res = load i24, ptr %p, align 1 ret i24 %res @@ -85,12 +85,12 @@ define i24 @load_i24(ptr %p) { define i32 @load_i32(ptr %p) { ; SLOWBASE-LABEL: load_i32: ; SLOWBASE: # %bb.0: -; SLOWBASE-NEXT: lbu a1, 1(a0) -; SLOWBASE-NEXT: lbu a2, 0(a0) +; SLOWBASE-NEXT: lbu a1, 0(a0) +; SLOWBASE-NEXT: lbu a2, 1(a0) ; SLOWBASE-NEXT: lbu a3, 2(a0) ; SLOWBASE-NEXT: lbu a0, 3(a0) -; SLOWBASE-NEXT: slli a1, a1, 8 -; SLOWBASE-NEXT: or a1, a1, a2 +; SLOWBASE-NEXT: slli a2, a2, 8 +; SLOWBASE-NEXT: or a1, a2, a1 ; SLOWBASE-NEXT: slli a3, a3, 16 ; SLOWBASE-NEXT: slli a0, a0, 24 ; SLOWBASE-NEXT: or a0, a0, a3 @@ -99,13 +99,13 @@ define i32 @load_i32(ptr %p) { ; ; RV32IZBKB-LABEL: load_i32: ; RV32IZBKB: # %bb.0: -; RV32IZBKB-NEXT: lbu a1, 1(a0) -; RV32IZBKB-NEXT: lbu a2, 2(a0) -; RV32IZBKB-NEXT: lbu a3, 3(a0) -; RV32IZBKB-NEXT: lbu a0, 0(a0) -; RV32IZBKB-NEXT: packh a2, a2, a3 -; RV32IZBKB-NEXT: packh a0, a0, a1 -; RV32IZBKB-NEXT: pack a0, a0, a2 +; RV32IZBKB-NEXT: lbu a1, 0(a0) +; RV32IZBKB-NEXT: lbu a2, 1(a0) +; RV32IZBKB-NEXT: lbu a3, 2(a0) +; RV32IZBKB-NEXT: lbu a0, 3(a0) +; RV32IZBKB-NEXT: packh a0, a3, a0 +; RV32IZBKB-NEXT: packh a1, a1, a2 +; RV32IZBKB-NEXT: pack a0, a1, a0 ; RV32IZBKB-NEXT: ret ; ; RV64IZBKB-LABEL: load_i32: @@ -132,50 +132,50 @@ define i32 @load_i32(ptr %p) { define i64 @load_i64(ptr %p) { ; RV32I-LABEL: load_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a1, 1(a0) -; RV32I-NEXT: lbu a2, 2(a0) -; RV32I-NEXT: lbu a3, 3(a0) -; RV32I-NEXT: lbu a4, 0(a0) -; RV32I-NEXT: slli a1, a1, 8 -; RV32I-NEXT: slli a2, a2, 16 -; RV32I-NEXT: slli a3, a3, 24 -; RV32I-NEXT: or a1, a1, a4 -; RV32I-NEXT: lbu a4, 4(a0) -; RV32I-NEXT: lbu a5, 5(a0) -; RV32I-NEXT: or a2, a3, a2 -; RV32I-NEXT: lbu a3, 6(a0) -; RV32I-NEXT: lbu a0, 7(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: lbu a1, 0(a0) +; RV32I-NEXT: lbu a2, 1(a0) +; RV32I-NEXT: lbu a3, 2(a0) +; RV32I-NEXT: lbu a4, 3(a0) +; RV32I-NEXT: slli a2, a2, 8 ; RV32I-NEXT: slli a3, a3, 16 +; RV32I-NEXT: slli a4, a4, 24 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a2, 4(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 6(a0) +; RV32I-NEXT: lbu a0, 7(a0) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a2, a4, a2 +; RV32I-NEXT: slli a5, a5, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a3, a0, a3 -; RV32I-NEXT: or a0, a2, a1 -; RV32I-NEXT: or a1, a3, a4 +; RV32I-NEXT: or a5, a0, a5 +; RV32I-NEXT: or a0, a3, a1 +; RV32I-NEXT: or a1, a5, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: load_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a1, 1(a0) -; RV64I-NEXT: lbu a2, 2(a0) -; RV64I-NEXT: lbu a3, 3(a0) -; RV64I-NEXT: lbu a4, 0(a0) -; RV64I-NEXT: slli a1, a1, 8 -; RV64I-NEXT: slli a2, a2, 16 -; RV64I-NEXT: slli a3, a3, 24 -; RV64I-NEXT: or a1, a1, a4 -; RV64I-NEXT: lbu a4, 4(a0) -; RV64I-NEXT: lbu a5, 5(a0) -; RV64I-NEXT: or a2, a3, a2 -; RV64I-NEXT: lbu a3, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: lbu a1, 0(a0) +; RV64I-NEXT: lbu a2, 1(a0) +; RV64I-NEXT: lbu a3, 2(a0) +; RV64I-NEXT: lbu a4, 3(a0) +; RV64I-NEXT: slli a2, a2, 8 ; RV64I-NEXT: slli a3, a3, 16 -; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a3 +; RV64I-NEXT: slli a4, a4, 24 ; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: or a0, a0, a4 +; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a2, 4(a0) +; RV64I-NEXT: lbu a4, 5(a0) +; RV64I-NEXT: lbu a5, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a2, a4, a2 +; RV64I-NEXT: slli a5, a5, 16 +; RV64I-NEXT: slli a0, a0, 24 +; RV64I-NEXT: or a0, a0, a5 +; RV64I-NEXT: or a1, a3, a1 +; RV64I-NEXT: or a0, a0, a2 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a1 ; RV64I-NEXT: ret @@ -186,16 +186,16 @@ define i64 @load_i64(ptr %p) { ; RV32IZBKB-NEXT: lbu a2, 1(a0) ; RV32IZBKB-NEXT: lbu a3, 2(a0) ; RV32IZBKB-NEXT: lbu a4, 3(a0) -; RV32IZBKB-NEXT: lbu a5, 5(a0) -; RV32IZBKB-NEXT: lbu a6, 6(a0) -; RV32IZBKB-NEXT: lbu a7, 7(a0) -; RV32IZBKB-NEXT: lbu a0, 4(a0) ; RV32IZBKB-NEXT: packh a3, a3, a4 ; RV32IZBKB-NEXT: packh a1, a1, a2 -; RV32IZBKB-NEXT: packh a2, a6, a7 -; RV32IZBKB-NEXT: packh a4, a0, a5 +; RV32IZBKB-NEXT: lbu a2, 4(a0) +; RV32IZBKB-NEXT: lbu a4, 5(a0) +; RV32IZBKB-NEXT: lbu a5, 6(a0) +; RV32IZBKB-NEXT: lbu a0, 7(a0) +; RV32IZBKB-NEXT: packh a5, a5, a0 +; RV32IZBKB-NEXT: packh a2, a2, a4 ; RV32IZBKB-NEXT: pack a0, a1, a3 -; RV32IZBKB-NEXT: pack a1, a4, a2 +; RV32IZBKB-NEXT: pack a1, a2, a5 ; RV32IZBKB-NEXT: ret ; ; RV64IZBKB-LABEL: load_i64: @@ -204,14 +204,14 @@ define i64 @load_i64(ptr %p) { ; RV64IZBKB-NEXT: lbu a2, 5(a0) ; RV64IZBKB-NEXT: lbu a3, 6(a0) ; RV64IZBKB-NEXT: lbu a4, 7(a0) -; RV64IZBKB-NEXT: lbu a5, 0(a0) -; RV64IZBKB-NEXT: lbu a6, 1(a0) -; RV64IZBKB-NEXT: lbu a7, 2(a0) -; RV64IZBKB-NEXT: lbu a0, 3(a0) ; RV64IZBKB-NEXT: packh a1, a1, a2 ; RV64IZBKB-NEXT: packh a2, a3, a4 -; RV64IZBKB-NEXT: packh a3, a5, a6 -; RV64IZBKB-NEXT: packh a0, a7, a0 +; RV64IZBKB-NEXT: lbu a3, 0(a0) +; RV64IZBKB-NEXT: lbu a4, 1(a0) +; RV64IZBKB-NEXT: lbu a5, 2(a0) +; RV64IZBKB-NEXT: lbu a0, 3(a0) +; RV64IZBKB-NEXT: packh a3, a3, a4 +; RV64IZBKB-NEXT: packh a0, a5, a0 ; RV64IZBKB-NEXT: slli a2, a2, 16 ; RV64IZBKB-NEXT: slli a0, a0, 16 ; RV64IZBKB-NEXT: or a1, a2, a1 diff --git a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll index c73a18c8869d5..106acff8fab95 100644 --- a/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/urem-seteq-illegal-types.ll @@ -331,13 +331,13 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32-NEXT: sw s4, 8(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 -; RV32-NEXT: lbu a0, 4(a0) -; RV32-NEXT: lw a1, 0(s0) -; RV32-NEXT: slli a0, a0, 10 -; RV32-NEXT: srli s1, a1, 22 -; RV32-NEXT: or s1, s1, a0 -; RV32-NEXT: srli s2, a1, 11 -; RV32-NEXT: andi a0, a1, 2047 +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: lbu a1, 4(s0) +; RV32-NEXT: slli a1, a1, 10 +; RV32-NEXT: srli s1, a0, 22 +; RV32-NEXT: or s1, s1, a1 +; RV32-NEXT: srli s2, a0, 11 +; RV32-NEXT: andi a0, a0, 2047 ; RV32-NEXT: li a1, 683 ; RV32-NEXT: call __mulsi3 ; RV32-NEXT: slli a1, a0, 10 @@ -388,10 +388,10 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64-NEXT: mv s0, a0 -; RV64-NEXT: lbu a0, 4(a0) -; RV64-NEXT: lwu a1, 0(s0) -; RV64-NEXT: slli a0, a0, 32 -; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: lwu a0, 0(a0) +; RV64-NEXT: lbu a1, 4(s0) +; RV64-NEXT: slli a1, a1, 32 +; RV64-NEXT: or a0, a0, a1 ; RV64-NEXT: srli s1, a0, 22 ; RV64-NEXT: srli s2, a0, 11 ; RV64-NEXT: andi a0, a0, 2047 @@ -438,40 +438,40 @@ define void @test_urem_vec(ptr %X) nounwind { ; ; RV32M-LABEL: test_urem_vec: ; RV32M: # %bb.0: -; RV32M-NEXT: lbu a1, 4(a0) -; RV32M-NEXT: lw a2, 0(a0) +; RV32M-NEXT: lw a1, 0(a0) +; RV32M-NEXT: lbu a2, 4(a0) ; RV32M-NEXT: li a3, 683 ; RV32M-NEXT: li a4, 819 -; RV32M-NEXT: slli a1, a1, 10 -; RV32M-NEXT: srli a5, a2, 22 -; RV32M-NEXT: or a1, a5, a1 -; RV32M-NEXT: andi a5, a2, 2047 +; RV32M-NEXT: slli a2, a2, 10 +; RV32M-NEXT: srli a5, a1, 22 +; RV32M-NEXT: or a2, a5, a2 +; RV32M-NEXT: andi a5, a1, 2047 ; RV32M-NEXT: mul a3, a5, a3 ; RV32M-NEXT: li a5, 1463 -; RV32M-NEXT: srli a2, a2, 11 -; RV32M-NEXT: mul a2, a2, a5 +; RV32M-NEXT: srli a1, a1, 11 +; RV32M-NEXT: mul a1, a1, a5 ; RV32M-NEXT: slli a5, a3, 10 ; RV32M-NEXT: slli a3, a3, 21 -; RV32M-NEXT: mul a1, a1, a4 -; RV32M-NEXT: addi a2, a2, -1463 +; RV32M-NEXT: mul a2, a2, a4 +; RV32M-NEXT: addi a1, a1, -1463 ; RV32M-NEXT: srli a3, a3, 22 -; RV32M-NEXT: addi a1, a1, -1638 -; RV32M-NEXT: andi a2, a2, 2047 -; RV32M-NEXT: or a3, a3, a5 +; RV32M-NEXT: addi a2, a2, -1638 ; RV32M-NEXT: andi a1, a1, 2047 -; RV32M-NEXT: sltiu a2, a2, 293 +; RV32M-NEXT: or a3, a3, a5 +; RV32M-NEXT: andi a2, a2, 2047 +; RV32M-NEXT: sltiu a1, a1, 293 ; RV32M-NEXT: andi a3, a3, 2047 -; RV32M-NEXT: sltiu a1, a1, 2 -; RV32M-NEXT: addi a2, a2, -1 -; RV32M-NEXT: sltiu a3, a3, 342 -; RV32M-NEXT: xori a4, a1, 1 +; RV32M-NEXT: sltiu a2, a2, 2 ; RV32M-NEXT: addi a1, a1, -1 -; RV32M-NEXT: andi a2, a2, 2047 +; RV32M-NEXT: sltiu a3, a3, 342 +; RV32M-NEXT: xori a4, a2, 1 +; RV32M-NEXT: addi a2, a2, -1 +; RV32M-NEXT: andi a1, a1, 2047 ; RV32M-NEXT: addi a3, a3, -1 -; RV32M-NEXT: slli a2, a2, 11 -; RV32M-NEXT: slli a1, a1, 22 +; RV32M-NEXT: slli a1, a1, 11 +; RV32M-NEXT: slli a2, a2, 22 ; RV32M-NEXT: andi a3, a3, 2047 -; RV32M-NEXT: or a1, a2, a1 +; RV32M-NEXT: or a1, a1, a2 ; RV32M-NEXT: or a1, a3, a1 ; RV32M-NEXT: sw a1, 0(a0) ; RV32M-NEXT: sb a4, 4(a0) @@ -479,12 +479,12 @@ define void @test_urem_vec(ptr %X) nounwind { ; ; RV64M-LABEL: test_urem_vec: ; RV64M: # %bb.0: -; RV64M-NEXT: lbu a1, 4(a0) -; RV64M-NEXT: lwu a2, 0(a0) +; RV64M-NEXT: lwu a1, 0(a0) +; RV64M-NEXT: lbu a2, 4(a0) ; RV64M-NEXT: li a3, 683 ; RV64M-NEXT: li a4, 1463 -; RV64M-NEXT: slli a1, a1, 32 -; RV64M-NEXT: or a1, a2, a1 +; RV64M-NEXT: slli a2, a2, 32 +; RV64M-NEXT: or a1, a1, a2 ; RV64M-NEXT: andi a2, a1, 2047 ; RV64M-NEXT: mul a2, a2, a3 ; RV64M-NEXT: srli a3, a1, 11 @@ -538,15 +538,9 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-NEXT: srli a1, a1, 21 ; RV32MV-NEXT: vslide1down.vx v10, v10, a1 ; RV32MV-NEXT: li a1, 2047 -; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV32MV-NEXT: vmv.v.i v11, 1 +; RV32MV-NEXT: addi a3, a3, -1527 ; RV32MV-NEXT: andi a2, a2, 2047 -; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV32MV-NEXT: vslide1down.vx v10, v10, a2 -; RV32MV-NEXT: lui a2, %hi(.LCPI4_1) -; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_1) -; RV32MV-NEXT: addi a3, a3, -1527 -; RV32MV-NEXT: vsext.vf2 v12, v11 ; RV32MV-NEXT: vslidedown.vi v10, v10, 1 ; RV32MV-NEXT: vsub.vv v8, v10, v8 ; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -556,14 +550,20 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV32MV-NEXT: vmul.vv v8, v8, v9 ; RV32MV-NEXT: vadd.vv v9, v8, v8 ; RV32MV-NEXT: vsll.vv v9, v9, v11 -; RV32MV-NEXT: vle16.v v10, (a2) +; RV32MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV32MV-NEXT: vmv.v.i v10, 1 +; RV32MV-NEXT: lui a2, %hi(.LCPI4_1) +; RV32MV-NEXT: addi a2, a2, %lo(.LCPI4_1) +; RV32MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32MV-NEXT: vsext.vf2 v11, v10 ; RV32MV-NEXT: vand.vx v8, v8, a1 -; RV32MV-NEXT: vsrl.vv v8, v8, v12 +; RV32MV-NEXT: vsrl.vv v8, v8, v11 +; RV32MV-NEXT: vmv.v.i v10, 0 ; RV32MV-NEXT: vor.vv v8, v8, v9 +; RV32MV-NEXT: vle16.v v9, (a2) ; RV32MV-NEXT: vand.vx v8, v8, a1 -; RV32MV-NEXT: vmsltu.vv v0, v10, v8 -; RV32MV-NEXT: vmv.v.i v8, 0 -; RV32MV-NEXT: vmerge.vim v8, v8, -1, v0 +; RV32MV-NEXT: vmsltu.vv v0, v9, v8 +; RV32MV-NEXT: vmerge.vim v8, v10, -1, v0 ; RV32MV-NEXT: vslidedown.vi v9, v8, 2 ; RV32MV-NEXT: vmv.x.s a1, v8 ; RV32MV-NEXT: vslidedown.vi v8, v8, 1 @@ -599,15 +599,9 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: srli a2, a2, 53 ; RV64MV-NEXT: vslide1down.vx v10, v10, a2 ; RV64MV-NEXT: li a2, 2047 -; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma -; RV64MV-NEXT: vmv.v.i v11, 1 +; RV64MV-NEXT: addi a3, a3, -1527 ; RV64MV-NEXT: srli a1, a1, 22 -; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma ; RV64MV-NEXT: vslide1down.vx v10, v10, a1 -; RV64MV-NEXT: lui a1, %hi(.LCPI4_1) -; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_1) -; RV64MV-NEXT: addi a3, a3, -1527 -; RV64MV-NEXT: vsext.vf2 v12, v11 ; RV64MV-NEXT: vslidedown.vi v10, v10, 1 ; RV64MV-NEXT: vsub.vv v8, v10, v8 ; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma @@ -617,14 +611,20 @@ define void @test_urem_vec(ptr %X) nounwind { ; RV64MV-NEXT: vmul.vv v8, v8, v9 ; RV64MV-NEXT: vadd.vv v9, v8, v8 ; RV64MV-NEXT: vsll.vv v9, v9, v11 -; RV64MV-NEXT: vle16.v v10, (a1) +; RV64MV-NEXT: vsetvli zero, zero, e32, m1, ta, ma +; RV64MV-NEXT: vmv.v.i v10, 1 +; RV64MV-NEXT: lui a1, %hi(.LCPI4_1) +; RV64MV-NEXT: addi a1, a1, %lo(.LCPI4_1) +; RV64MV-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64MV-NEXT: vsext.vf2 v11, v10 ; RV64MV-NEXT: vand.vx v8, v8, a2 -; RV64MV-NEXT: vsrl.vv v8, v8, v12 +; RV64MV-NEXT: vsrl.vv v8, v8, v11 +; RV64MV-NEXT: vmv.v.i v10, 0 ; RV64MV-NEXT: vor.vv v8, v8, v9 +; RV64MV-NEXT: vle16.v v9, (a1) ; RV64MV-NEXT: vand.vx v8, v8, a2 -; RV64MV-NEXT: vmsltu.vv v0, v10, v8 -; RV64MV-NEXT: vmv.v.i v8, 0 -; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0 +; RV64MV-NEXT: vmsltu.vv v0, v9, v8 +; RV64MV-NEXT: vmerge.vim v8, v10, -1, v0 ; RV64MV-NEXT: vmv.x.s a1, v8 ; RV64MV-NEXT: vslidedown.vi v9, v8, 1 ; RV64MV-NEXT: vslidedown.vi v8, v8, 2 diff --git a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll index 988856ca70923..c9d9ed13faa08 100644 --- a/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/RISCV/urem-vector-lkk.ll @@ -19,30 +19,29 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu a2, 0(a1) -; RV32I-NEXT: lhu s0, 4(a1) -; RV32I-NEXT: lhu s1, 8(a1) -; RV32I-NEXT: lhu s2, 12(a1) -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a0, 0(a1) +; RV32I-NEXT: lhu s1, 4(a1) +; RV32I-NEXT: lhu s2, 8(a1) +; RV32I-NEXT: lhu s3, 12(a1) ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 124 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: li a1, 98 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: mv s1, a0 -; RV32I-NEXT: li a1, 1003 +; RV32I-NEXT: li a1, 98 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: sh s4, 0(s3) -; RV32I-NEXT: sh s0, 2(s3) -; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: li a1, 1003 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: sh s4, 0(s0) +; RV32I-NEXT: sh s1, 2(s0) +; RV32I-NEXT: sh s2, 4(s0) +; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -98,30 +97,29 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu a2, 0(a1) -; RV64I-NEXT: lhu s0, 8(a1) -; RV64I-NEXT: lhu s1, 16(a1) -; RV64I-NEXT: lhu s2, 24(a1) -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a0, 0(a1) +; RV64I-NEXT: lhu s1, 8(a1) +; RV64I-NEXT: lhu s2, 16(a1) +; RV64I-NEXT: lhu s3, 24(a1) ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 124 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: li a1, 98 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s1, a0 -; RV64I-NEXT: li a1, 1003 +; RV64I-NEXT: li a1, 98 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: sh s4, 0(s3) -; RV64I-NEXT: sh s0, 2(s3) -; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: li a1, 1003 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: sh s4, 0(s0) +; RV64I-NEXT: sh s1, 2(s0) +; RV64I-NEXT: sh s2, 4(s0) +; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -140,18 +138,18 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lui a5, %hi(.LCPI0_0) ; RV64IM-NEXT: lui a6, %hi(.LCPI0_1) ; RV64IM-NEXT: li a7, 95 -; RV64IM-NEXT: ld a6, %lo(.LCPI0_1)(a6) ; RV64IM-NEXT: lui t0, %hi(.LCPI0_2) ; RV64IM-NEXT: li t1, 98 -; RV64IM-NEXT: ld t0, %lo(.LCPI0_2)(t0) +; RV64IM-NEXT: ld a6, %lo(.LCPI0_1)(a6) ; RV64IM-NEXT: mulhu a6, a2, a6 ; RV64IM-NEXT: mul a6, a6, a7 ; RV64IM-NEXT: lui a7, %hi(.LCPI0_3) -; RV64IM-NEXT: ld a5, %lo(.LCPI0_0)(a5) -; RV64IM-NEXT: ld a7, %lo(.LCPI0_3)(a7) +; RV64IM-NEXT: ld t0, %lo(.LCPI0_2)(t0) ; RV64IM-NEXT: mulhu t0, a4, t0 ; RV64IM-NEXT: mul t0, t0, t1 ; RV64IM-NEXT: li t1, 1003 +; RV64IM-NEXT: ld a5, %lo(.LCPI0_0)(a5) +; RV64IM-NEXT: ld a7, %lo(.LCPI0_3)(a7) ; RV64IM-NEXT: mulhu a5, a3, a5 ; RV64IM-NEXT: mulhu a7, a1, a7 ; RV64IM-NEXT: mul a7, a7, t1 @@ -181,30 +179,29 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s4, 8(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu a2, 0(a1) -; RV32I-NEXT: lhu s0, 4(a1) -; RV32I-NEXT: lhu s1, 8(a1) -; RV32I-NEXT: lhu s2, 12(a1) -; RV32I-NEXT: mv s3, a0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a0, 0(a1) +; RV32I-NEXT: lhu s1, 4(a1) +; RV32I-NEXT: lhu s2, 8(a1) +; RV32I-NEXT: lhu s3, 12(a1) ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: mv s4, a0 ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, s0 -; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: mv s0, a0 -; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: sh s4, 0(s3) -; RV32I-NEXT: sh s0, 2(s3) -; RV32I-NEXT: sh s1, 4(s3) -; RV32I-NEXT: sh a0, 6(s3) +; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: li a1, 95 +; RV32I-NEXT: mv a0, s3 +; RV32I-NEXT: call __umodsi3 +; RV32I-NEXT: sh s4, 0(s0) +; RV32I-NEXT: sh s1, 2(s0) +; RV32I-NEXT: sh s2, 4(s0) +; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -250,30 +247,29 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s4, 0(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu a2, 0(a1) -; RV64I-NEXT: lhu s0, 8(a1) -; RV64I-NEXT: lhu s1, 16(a1) -; RV64I-NEXT: lhu s2, 24(a1) -; RV64I-NEXT: mv s3, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a0, 0(a1) +; RV64I-NEXT: lhu s1, 8(a1) +; RV64I-NEXT: lhu s2, 16(a1) +; RV64I-NEXT: lhu s3, 24(a1) ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s4, a0 ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, s0 -; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: mv s0, a0 -; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: sh s4, 0(s3) -; RV64I-NEXT: sh s0, 2(s3) -; RV64I-NEXT: sh s1, 4(s3) -; RV64I-NEXT: sh a0, 6(s3) +; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: li a1, 95 +; RV64I-NEXT: mv a0, s3 +; RV64I-NEXT: call __umoddi3 +; RV64I-NEXT: sh s4, 0(s0) +; RV64I-NEXT: sh s1, 2(s0) +; RV64I-NEXT: sh s2, 4(s0) +; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -285,28 +281,28 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: fold_urem_vec_2: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lui a2, %hi(.LCPI1_0) -; RV64IM-NEXT: ld a2, %lo(.LCPI1_0)(a2) -; RV64IM-NEXT: lhu a3, 0(a1) -; RV64IM-NEXT: lhu a4, 8(a1) -; RV64IM-NEXT: lhu a5, 16(a1) +; RV64IM-NEXT: lhu a2, 0(a1) +; RV64IM-NEXT: lhu a3, 8(a1) +; RV64IM-NEXT: lhu a4, 16(a1) ; RV64IM-NEXT: lhu a1, 24(a1) +; RV64IM-NEXT: lui a5, %hi(.LCPI1_0) ; RV64IM-NEXT: li a6, 95 -; RV64IM-NEXT: mulhu a7, a3, a2 -; RV64IM-NEXT: mulhu t0, a4, a2 -; RV64IM-NEXT: mulhu t1, a5, a2 -; RV64IM-NEXT: mulhu a2, a1, a2 +; RV64IM-NEXT: ld a5, %lo(.LCPI1_0)(a5) +; RV64IM-NEXT: mulhu a7, a2, a5 +; RV64IM-NEXT: mulhu t0, a3, a5 +; RV64IM-NEXT: mulhu t1, a4, a5 +; RV64IM-NEXT: mulhu a5, a1, a5 ; RV64IM-NEXT: mul a7, a7, a6 ; RV64IM-NEXT: mul t0, t0, a6 ; RV64IM-NEXT: mul t1, t1, a6 -; RV64IM-NEXT: mul a2, a2, a6 -; RV64IM-NEXT: subw a3, a3, a7 -; RV64IM-NEXT: subw a4, a4, t0 -; RV64IM-NEXT: subw a5, a5, t1 -; RV64IM-NEXT: subw a1, a1, a2 -; RV64IM-NEXT: sh a3, 0(a0) -; RV64IM-NEXT: sh a4, 2(a0) -; RV64IM-NEXT: sh a5, 4(a0) +; RV64IM-NEXT: mul a5, a5, a6 +; RV64IM-NEXT: subw a2, a2, a7 +; RV64IM-NEXT: subw a3, a3, t0 +; RV64IM-NEXT: subw a4, a4, t1 +; RV64IM-NEXT: subw a1, a1, a5 +; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a4, 4(a0) ; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, @@ -329,11 +325,11 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu s1, 0(a1) ; RV32I-NEXT: lhu s2, 4(a1) ; RV32I-NEXT: lhu s3, 8(a1) ; RV32I-NEXT: lhu s4, 12(a1) -; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a1, 95 ; RV32I-NEXT: mv a0, s4 ; RV32I-NEXT: call __umodsi3 @@ -430,11 +426,11 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s6, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s7, 8(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s8, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu s1, 0(a1) ; RV64I-NEXT: lhu s2, 8(a1) ; RV64I-NEXT: lhu s3, 16(a1) ; RV64I-NEXT: lhu s4, 24(a1) -; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: li a1, 95 ; RV64I-NEXT: mv a0, s4 ; RV64I-NEXT: call __umoddi3 @@ -489,33 +485,33 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) nounwind { ; ; RV64IM-LABEL: combine_urem_udiv: ; RV64IM: # %bb.0: -; RV64IM-NEXT: lhu a2, 16(a1) -; RV64IM-NEXT: lhu a3, 24(a1) -; RV64IM-NEXT: lui a4, %hi(.LCPI2_0) -; RV64IM-NEXT: ld a4, %lo(.LCPI2_0)(a4) -; RV64IM-NEXT: lhu a5, 0(a1) -; RV64IM-NEXT: lhu a1, 8(a1) +; RV64IM-NEXT: lhu a2, 0(a1) +; RV64IM-NEXT: lhu a3, 8(a1) +; RV64IM-NEXT: lhu a4, 16(a1) +; RV64IM-NEXT: lhu a1, 24(a1) +; RV64IM-NEXT: lui a5, %hi(.LCPI2_0) ; RV64IM-NEXT: li a6, 95 -; RV64IM-NEXT: mulhu a7, a3, a4 -; RV64IM-NEXT: mulhu t0, a2, a4 -; RV64IM-NEXT: mulhu t1, a1, a4 -; RV64IM-NEXT: mulhu a4, a5, a4 +; RV64IM-NEXT: ld a5, %lo(.LCPI2_0)(a5) +; RV64IM-NEXT: mulhu a7, a1, a5 +; RV64IM-NEXT: mulhu t0, a4, a5 +; RV64IM-NEXT: mulhu t1, a3, a5 +; RV64IM-NEXT: mulhu a5, a2, a5 ; RV64IM-NEXT: mul t2, a7, a6 ; RV64IM-NEXT: mul t3, t0, a6 ; RV64IM-NEXT: mul t4, t1, a6 -; RV64IM-NEXT: mul a6, a4, a6 -; RV64IM-NEXT: add a4, a5, a4 -; RV64IM-NEXT: add a1, a1, t1 -; RV64IM-NEXT: add a2, a2, t0 -; RV64IM-NEXT: add a3, a3, a7 -; RV64IM-NEXT: subw a4, a4, a6 -; RV64IM-NEXT: subw a1, a1, t4 -; RV64IM-NEXT: subw a2, a2, t3 -; RV64IM-NEXT: subw a3, a3, t2 -; RV64IM-NEXT: sh a4, 0(a0) -; RV64IM-NEXT: sh a1, 2(a0) -; RV64IM-NEXT: sh a2, 4(a0) -; RV64IM-NEXT: sh a3, 6(a0) +; RV64IM-NEXT: mul a6, a5, a6 +; RV64IM-NEXT: add a2, a2, a5 +; RV64IM-NEXT: add a3, a3, t1 +; RV64IM-NEXT: add a4, a4, t0 +; RV64IM-NEXT: add a1, a1, a7 +; RV64IM-NEXT: subw a2, a2, a6 +; RV64IM-NEXT: subw a3, a3, t4 +; RV64IM-NEXT: subw a4, a4, t3 +; RV64IM-NEXT: subw a1, a1, t2 +; RV64IM-NEXT: sh a2, 0(a0) +; RV64IM-NEXT: sh a3, 2(a0) +; RV64IM-NEXT: sh a4, 4(a0) +; RV64IM-NEXT: sh a1, 6(a0) ; RV64IM-NEXT: ret %1 = urem <4 x i16> %x, %2 = udiv <4 x i16> %x, @@ -533,13 +529,12 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lhu s1, 0(a1) ; RV32I-NEXT: lhu s2, 4(a1) ; RV32I-NEXT: lhu s3, 8(a1) -; RV32I-NEXT: lhu a2, 12(a1) -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a0, 12(a1) ; RV32I-NEXT: li a1, 95 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: andi a1, s1, 63 ; RV32I-NEXT: andi a2, s2, 31 @@ -585,13 +580,12 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: mv s0, a0 ; RV64I-NEXT: lhu s1, 0(a1) ; RV64I-NEXT: lhu s2, 8(a1) ; RV64I-NEXT: lhu s3, 16(a1) -; RV64I-NEXT: lhu a2, 24(a1) -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a0, 24(a1) ; RV64I-NEXT: li a1, 95 -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: andi a1, s1, 63 ; RV64I-NEXT: andi a2, s2, 31 @@ -642,26 +636,25 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lhu a2, 4(a1) -; RV32I-NEXT: lhu s0, 8(a1) -; RV32I-NEXT: lhu s1, 12(a1) -; RV32I-NEXT: mv s2, a0 +; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: lhu a0, 4(a1) +; RV32I-NEXT: lhu s1, 8(a1) +; RV32I-NEXT: lhu s2, 12(a1) ; RV32I-NEXT: li a1, 654 -; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: call __umodsi3 ; RV32I-NEXT: mv s3, a0 ; RV32I-NEXT: li a1, 23 -; RV32I-NEXT: mv a0, s0 +; RV32I-NEXT: mv a0, s1 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: mv s0, a0 +; RV32I-NEXT: mv s1, a0 ; RV32I-NEXT: lui a0, 1 ; RV32I-NEXT: addi a1, a0, 1327 -; RV32I-NEXT: mv a0, s1 +; RV32I-NEXT: mv a0, s2 ; RV32I-NEXT: call __umodsi3 -; RV32I-NEXT: sh zero, 0(s2) -; RV32I-NEXT: sh s3, 2(s2) -; RV32I-NEXT: sh s0, 4(s2) -; RV32I-NEXT: sh a0, 6(s2) +; RV32I-NEXT: sh zero, 0(s0) +; RV32I-NEXT: sh s3, 2(s0) +; RV32I-NEXT: sh s1, 4(s0) +; RV32I-NEXT: sh a0, 6(s0) ; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload ; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload @@ -708,26 +701,25 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: lhu a2, 8(a1) -; RV64I-NEXT: lhu s0, 16(a1) -; RV64I-NEXT: lhu s1, 24(a1) -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: lhu a0, 8(a1) +; RV64I-NEXT: lhu s1, 16(a1) +; RV64I-NEXT: lhu s2, 24(a1) ; RV64I-NEXT: li a1, 654 -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 23 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: sh zero, 0(s2) -; RV64I-NEXT: sh s3, 2(s2) -; RV64I-NEXT: sh s0, 4(s2) -; RV64I-NEXT: sh a0, 6(s2) +; RV64I-NEXT: sh zero, 0(s0) +; RV64I-NEXT: sh s3, 2(s0) +; RV64I-NEXT: sh s1, 4(s0) +; RV64I-NEXT: sh a0, 6(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -743,17 +735,17 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) nounwind { ; RV64IM-NEXT: lhu a1, 24(a1) ; RV64IM-NEXT: lui a4, %hi(.LCPI4_0) ; RV64IM-NEXT: li a5, 654 -; RV64IM-NEXT: ld a4, %lo(.LCPI4_0)(a4) ; RV64IM-NEXT: lui a6, %hi(.LCPI4_1) ; RV64IM-NEXT: li a7, 23 -; RV64IM-NEXT: ld a6, %lo(.LCPI4_1)(a6) +; RV64IM-NEXT: ld a4, %lo(.LCPI4_0)(a4) ; RV64IM-NEXT: mulhu a4, a2, a4 ; RV64IM-NEXT: mul a4, a4, a5 ; RV64IM-NEXT: lui a5, %hi(.LCPI4_2) -; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) +; RV64IM-NEXT: ld a6, %lo(.LCPI4_1)(a6) ; RV64IM-NEXT: mulhu a6, a3, a6 ; RV64IM-NEXT: mul a6, a6, a7 ; RV64IM-NEXT: lui a7, 1 +; RV64IM-NEXT: ld a5, %lo(.LCPI4_2)(a5) ; RV64IM-NEXT: addi a7, a7, 1327 ; RV64IM-NEXT: mulhu a5, a1, a5 ; RV64IM-NEXT: mul a5, a5, a7 @@ -793,18 +785,17 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32I-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32I-NEXT: sw s8, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: lw s1, 16(a1) ; RV32I-NEXT: lw s2, 20(a1) ; RV32I-NEXT: lw s3, 24(a1) ; RV32I-NEXT: lw s4, 28(a1) -; RV32I-NEXT: lw a3, 0(a1) -; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a0, 0(a1) +; RV32I-NEXT: lw a3, 4(a1) ; RV32I-NEXT: lw s5, 8(a1) ; RV32I-NEXT: lw s6, 12(a1) -; RV32I-NEXT: mv s0, a0 ; RV32I-NEXT: li a2, 1 -; RV32I-NEXT: mv a0, a3 -; RV32I-NEXT: mv a1, a4 +; RV32I-NEXT: mv a1, a3 ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: call __umoddi3 ; RV32I-NEXT: mv s7, a0 @@ -863,18 +854,17 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV32IM-NEXT: sw s6, 16(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s7, 12(sp) # 4-byte Folded Spill ; RV32IM-NEXT: sw s8, 8(sp) # 4-byte Folded Spill +; RV32IM-NEXT: mv s0, a0 ; RV32IM-NEXT: lw s1, 16(a1) ; RV32IM-NEXT: lw s2, 20(a1) ; RV32IM-NEXT: lw s3, 24(a1) ; RV32IM-NEXT: lw s4, 28(a1) -; RV32IM-NEXT: lw a3, 0(a1) -; RV32IM-NEXT: lw a4, 4(a1) +; RV32IM-NEXT: lw a0, 0(a1) +; RV32IM-NEXT: lw a3, 4(a1) ; RV32IM-NEXT: lw s5, 8(a1) ; RV32IM-NEXT: lw s6, 12(a1) -; RV32IM-NEXT: mv s0, a0 ; RV32IM-NEXT: li a2, 1 -; RV32IM-NEXT: mv a0, a3 -; RV32IM-NEXT: mv a1, a4 +; RV32IM-NEXT: mv a1, a3 ; RV32IM-NEXT: li a3, 0 ; RV32IM-NEXT: call __umoddi3 ; RV32IM-NEXT: mv s7, a0 @@ -928,26 +918,25 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill ; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill -; RV64I-NEXT: ld a2, 8(a1) -; RV64I-NEXT: ld s0, 16(a1) -; RV64I-NEXT: ld s1, 24(a1) -; RV64I-NEXT: mv s2, a0 +; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: ld a0, 8(a1) +; RV64I-NEXT: ld s1, 16(a1) +; RV64I-NEXT: ld s2, 24(a1) ; RV64I-NEXT: li a1, 654 -; RV64I-NEXT: mv a0, a2 ; RV64I-NEXT: call __umoddi3 ; RV64I-NEXT: mv s3, a0 ; RV64I-NEXT: li a1, 23 -; RV64I-NEXT: mv a0, s0 +; RV64I-NEXT: mv a0, s1 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: mv s0, a0 +; RV64I-NEXT: mv s1, a0 ; RV64I-NEXT: lui a0, 1 ; RV64I-NEXT: addiw a1, a0, 1327 -; RV64I-NEXT: mv a0, s1 +; RV64I-NEXT: mv a0, s2 ; RV64I-NEXT: call __umoddi3 -; RV64I-NEXT: sd zero, 0(s2) -; RV64I-NEXT: sd s3, 8(s2) -; RV64I-NEXT: sd s0, 16(s2) -; RV64I-NEXT: sd a0, 24(s2) +; RV64I-NEXT: sd zero, 0(s0) +; RV64I-NEXT: sd s3, 8(s0) +; RV64I-NEXT: sd s1, 16(s0) +; RV64I-NEXT: sd a0, 24(s0) ; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload @@ -961,31 +950,31 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) nounwind { ; RV64IM-NEXT: ld a2, 8(a1) ; RV64IM-NEXT: ld a3, 16(a1) ; RV64IM-NEXT: ld a1, 24(a1) -; RV64IM-NEXT: lui a4, %hi(.LCPI6_1) -; RV64IM-NEXT: ld a4, %lo(.LCPI6_1)(a4) -; RV64IM-NEXT: lui a5, %hi(.LCPI6_0) +; RV64IM-NEXT: lui a4, %hi(.LCPI6_0) +; RV64IM-NEXT: lui a5, %hi(.LCPI6_1) ; RV64IM-NEXT: li a6, 654 +; RV64IM-NEXT: ld a5, %lo(.LCPI6_1)(a5) ; RV64IM-NEXT: srli a7, a2, 1 -; RV64IM-NEXT: mulhu a4, a7, a4 +; RV64IM-NEXT: mulhu a5, a7, a5 ; RV64IM-NEXT: lui a7, %hi(.LCPI6_2) -; RV64IM-NEXT: ld a5, %lo(.LCPI6_0)(a5) -; RV64IM-NEXT: ld a7, %lo(.LCPI6_2)(a7) -; RV64IM-NEXT: srli a4, a4, 7 -; RV64IM-NEXT: mul a4, a4, a6 +; RV64IM-NEXT: srli a5, a5, 7 +; RV64IM-NEXT: mul a5, a5, a6 ; RV64IM-NEXT: lui a6, 1 +; RV64IM-NEXT: ld a4, %lo(.LCPI6_0)(a4) +; RV64IM-NEXT: ld a7, %lo(.LCPI6_2)(a7) ; RV64IM-NEXT: addiw a6, a6, 1327 -; RV64IM-NEXT: mulhu a5, a3, a5 +; RV64IM-NEXT: mulhu a4, a3, a4 ; RV64IM-NEXT: mulhu a7, a1, a7 ; RV64IM-NEXT: srli a7, a7, 12 ; RV64IM-NEXT: mul a6, a7, a6 -; RV64IM-NEXT: sub a7, a3, a5 +; RV64IM-NEXT: sub a7, a3, a4 ; RV64IM-NEXT: srli a7, a7, 1 -; RV64IM-NEXT: add a5, a7, a5 -; RV64IM-NEXT: sub a2, a2, a4 +; RV64IM-NEXT: add a4, a7, a4 +; RV64IM-NEXT: sub a2, a2, a5 ; RV64IM-NEXT: sub a1, a1, a6 -; RV64IM-NEXT: li a4, 23 -; RV64IM-NEXT: srli a5, a5, 4 -; RV64IM-NEXT: mul a4, a5, a4 +; RV64IM-NEXT: li a5, 23 +; RV64IM-NEXT: srli a4, a4, 4 +; RV64IM-NEXT: mul a4, a4, a5 ; RV64IM-NEXT: sub a3, a3, a4 ; RV64IM-NEXT: sd zero, 0(a0) ; RV64IM-NEXT: sd a2, 8(a0) diff --git a/llvm/test/CodeGen/RISCV/vararg.ll b/llvm/test/CodeGen/RISCV/vararg.ll index 895d84b38be32..2d6434ebdb434 100644 --- a/llvm/test/CodeGen/RISCV/vararg.ll +++ b/llvm/test/CodeGen/RISCV/vararg.ll @@ -162,16 +162,16 @@ define i32 @va1(ptr %fmt, ...) { ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -80 ; LP64-LP64F-LP64D-FPELIM-NEXT: .cfi_def_cfa_offset 80 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 28 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 56(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 64(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 72(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 32(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 40(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 48(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 28 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 80 ; LP64-LP64F-LP64D-FPELIM-NEXT: .cfi_def_cfa_offset 0 ; LP64-LP64F-LP64D-FPELIM-NEXT: ret @@ -186,16 +186,16 @@ define i32 @va1(ptr %fmt, ...) { ; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_offset s0, -80 ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 32 ; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_def_cfa s0, 64 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 12 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: .cfi_def_cfa sp, 96 ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload @@ -209,14 +209,14 @@ define i32 @va1(ptr %fmt, ...) { ; LP64E-FPELIM: # %bb.0: ; LP64E-FPELIM-NEXT: addi sp, sp, -56 ; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 56 -; LP64E-FPELIM-NEXT: addi a0, sp, 20 -; LP64E-FPELIM-NEXT: sd a0, 0(sp) -; LP64E-FPELIM-NEXT: sd a1, 16(sp) -; LP64E-FPELIM-NEXT: lw a0, 16(sp) ; LP64E-FPELIM-NEXT: sd a5, 48(sp) +; LP64E-FPELIM-NEXT: sd a1, 16(sp) ; LP64E-FPELIM-NEXT: sd a2, 24(sp) ; LP64E-FPELIM-NEXT: sd a3, 32(sp) ; LP64E-FPELIM-NEXT: sd a4, 40(sp) +; LP64E-FPELIM-NEXT: addi a0, sp, 20 +; LP64E-FPELIM-NEXT: sd a0, 0(sp) +; LP64E-FPELIM-NEXT: lw a0, 16(sp) ; LP64E-FPELIM-NEXT: addi sp, sp, 56 ; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 0 ; LP64E-FPELIM-NEXT: ret @@ -231,14 +231,14 @@ define i32 @va1(ptr %fmt, ...) { ; LP64E-WITHFP-NEXT: .cfi_offset s0, -64 ; LP64E-WITHFP-NEXT: addi s0, sp, 24 ; LP64E-WITHFP-NEXT: .cfi_def_cfa s0, 48 -; LP64E-WITHFP-NEXT: addi a0, s0, 12 -; LP64E-WITHFP-NEXT: sd a0, -24(s0) -; LP64E-WITHFP-NEXT: sd a1, 8(s0) -; LP64E-WITHFP-NEXT: lw a0, 8(s0) ; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) ; LP64E-WITHFP-NEXT: sd a2, 16(s0) ; LP64E-WITHFP-NEXT: sd a3, 24(s0) ; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 12 +; LP64E-WITHFP-NEXT: sd a0, -24(s0) +; LP64E-WITHFP-NEXT: lw a0, 8(s0) ; LP64E-WITHFP-NEXT: .cfi_def_cfa sp, 72 ; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload ; LP64E-WITHFP-NEXT: ld s0, 8(sp) # 8-byte Folded Reload @@ -1348,10 +1348,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-FPELIM-NEXT: sw a0, 4(sp) ; ILP32-ILP32F-FPELIM-NEXT: andi a3, a3, -8 ; ILP32-ILP32F-FPELIM-NEXT: sw a4, 4(sp) -; ILP32-ILP32F-FPELIM-NEXT: lw a0, 4(a3) -; ILP32-ILP32F-FPELIM-NEXT: lw a3, 0(a3) -; ILP32-ILP32F-FPELIM-NEXT: add a2, a2, a0 -; ILP32-ILP32F-FPELIM-NEXT: add a0, a1, a3 +; ILP32-ILP32F-FPELIM-NEXT: lw a0, 0(a3) +; ILP32-ILP32F-FPELIM-NEXT: lw a3, 4(a3) +; ILP32-ILP32F-FPELIM-NEXT: add a2, a2, a3 +; ILP32-ILP32F-FPELIM-NEXT: add a0, a1, a0 ; ILP32-ILP32F-FPELIM-NEXT: sltu a1, a0, a1 ; ILP32-ILP32F-FPELIM-NEXT: add a1, a2, a1 ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 32 @@ -1374,10 +1374,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw a0, -12(s0) ; ILP32-ILP32F-WITHFP-NEXT: andi a3, a3, -8 ; ILP32-ILP32F-WITHFP-NEXT: sw a4, -12(s0) -; ILP32-ILP32F-WITHFP-NEXT: lw a0, 4(a3) -; ILP32-ILP32F-WITHFP-NEXT: lw a3, 0(a3) -; ILP32-ILP32F-WITHFP-NEXT: add a2, a2, a0 -; ILP32-ILP32F-WITHFP-NEXT: add a0, a1, a3 +; ILP32-ILP32F-WITHFP-NEXT: lw a0, 0(a3) +; ILP32-ILP32F-WITHFP-NEXT: lw a3, 4(a3) +; ILP32-ILP32F-WITHFP-NEXT: add a2, a2, a3 +; ILP32-ILP32F-WITHFP-NEXT: add a0, a1, a0 ; ILP32-ILP32F-WITHFP-NEXT: sltu a1, a0, a1 ; ILP32-ILP32F-WITHFP-NEXT: add a1, a2, a1 ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 20(sp) # 4-byte Folded Reload @@ -1399,10 +1399,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a0, 4(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: andi a3, a3, -8 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 4(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 4(a3) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 0(a3) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, a2, a0 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a3 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 0(a3) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 4(a3) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, a2, a3 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a0 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sltu a1, a0, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a2, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 32 @@ -1420,10 +1420,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32E-FPELIM-NEXT: sw a0, 0(sp) ; ILP32E-FPELIM-NEXT: andi a3, a3, -8 ; ILP32E-FPELIM-NEXT: sw a4, 0(sp) -; ILP32E-FPELIM-NEXT: lw a0, 4(a3) -; ILP32E-FPELIM-NEXT: lw a3, 0(a3) -; ILP32E-FPELIM-NEXT: add a2, a2, a0 -; ILP32E-FPELIM-NEXT: add a0, a1, a3 +; ILP32E-FPELIM-NEXT: lw a0, 0(a3) +; ILP32E-FPELIM-NEXT: lw a3, 4(a3) +; ILP32E-FPELIM-NEXT: add a2, a2, a3 +; ILP32E-FPELIM-NEXT: add a0, a1, a0 ; ILP32E-FPELIM-NEXT: sltu a1, a0, a1 ; ILP32E-FPELIM-NEXT: add a1, a2, a1 ; ILP32E-FPELIM-NEXT: addi sp, sp, 20 @@ -1444,10 +1444,10 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; ILP32E-WITHFP-NEXT: sw a0, -12(s0) ; ILP32E-WITHFP-NEXT: andi a3, a3, -8 ; ILP32E-WITHFP-NEXT: sw a4, -12(s0) -; ILP32E-WITHFP-NEXT: lw a0, 4(a3) -; ILP32E-WITHFP-NEXT: lw a3, 0(a3) -; ILP32E-WITHFP-NEXT: add a2, a2, a0 -; ILP32E-WITHFP-NEXT: add a0, a1, a3 +; ILP32E-WITHFP-NEXT: lw a0, 0(a3) +; ILP32E-WITHFP-NEXT: lw a3, 4(a3) +; ILP32E-WITHFP-NEXT: add a2, a2, a3 +; ILP32E-WITHFP-NEXT: add a0, a1, a0 ; ILP32E-WITHFP-NEXT: sltu a1, a0, a1 ; ILP32E-WITHFP-NEXT: add a1, a2, a1 ; ILP32E-WITHFP-NEXT: lw ra, 8(sp) # 4-byte Folded Reload @@ -1464,9 +1464,9 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, sp, 31 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 31 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a1, a2 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 64 ; LP64-LP64F-LP64D-FPELIM-NEXT: ret ; @@ -1482,9 +1482,9 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, s0, 15 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 15 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a1, a2 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80 @@ -1497,9 +1497,9 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; LP64E-FPELIM-NEXT: sd a3, 16(sp) ; LP64E-FPELIM-NEXT: sd a4, 24(sp) ; LP64E-FPELIM-NEXT: sd a5, 32(sp) -; LP64E-FPELIM-NEXT: addi a3, sp, 23 +; LP64E-FPELIM-NEXT: addi a0, sp, 23 +; LP64E-FPELIM-NEXT: sd a0, 0(sp) ; LP64E-FPELIM-NEXT: add a0, a1, a2 -; LP64E-FPELIM-NEXT: sd a3, 0(sp) ; LP64E-FPELIM-NEXT: addi sp, sp, 40 ; LP64E-FPELIM-NEXT: ret ; @@ -1513,9 +1513,9 @@ define i64 @va3(i32 %a, i64 %b, ...) nounwind { ; LP64E-WITHFP-NEXT: sd a3, 8(s0) ; LP64E-WITHFP-NEXT: sd a4, 16(s0) ; LP64E-WITHFP-NEXT: sd a5, 24(s0) -; LP64E-WITHFP-NEXT: addi a3, s0, 15 +; LP64E-WITHFP-NEXT: addi a0, s0, 15 +; LP64E-WITHFP-NEXT: sd a0, -24(s0) ; LP64E-WITHFP-NEXT: add a0, a1, a2 -; LP64E-WITHFP-NEXT: sd a3, -24(s0) ; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload ; LP64E-WITHFP-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; LP64E-WITHFP-NEXT: addi sp, sp, 56 @@ -1603,10 +1603,10 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 20(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: fld fa5, 0(a0) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: fsd fa5, 8(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 8(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, a2, a0 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a3 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a0, 8(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw a3, 12(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a2, a2, a3 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a0, a1, a0 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sltu a1, a0, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: add a1, a2, a1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 48 @@ -1668,9 +1668,9 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 24(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 32(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 40(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: addi a3, sp, 24 +; LP64-LP64F-LP64D-FPELIM-NEXT: addi a0, sp, 24 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, a1, a2 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 8(sp) ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 64 ; LP64-LP64F-LP64D-FPELIM-NEXT: ret ; @@ -1686,9 +1686,9 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 16(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 24(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a3, s0, 8 +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 8 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: add a0, a1, a2 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, -24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 24(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 16(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: addi sp, sp, 80 @@ -1701,9 +1701,9 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; LP64E-FPELIM-NEXT: sd a3, 16(sp) ; LP64E-FPELIM-NEXT: sd a4, 24(sp) ; LP64E-FPELIM-NEXT: sd a5, 32(sp) -; LP64E-FPELIM-NEXT: addi a3, sp, 16 +; LP64E-FPELIM-NEXT: addi a0, sp, 16 +; LP64E-FPELIM-NEXT: sd a0, 0(sp) ; LP64E-FPELIM-NEXT: add a0, a1, a2 -; LP64E-FPELIM-NEXT: sd a3, 0(sp) ; LP64E-FPELIM-NEXT: addi sp, sp, 40 ; LP64E-FPELIM-NEXT: ret ; @@ -1717,9 +1717,9 @@ define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { ; LP64E-WITHFP-NEXT: sd a3, 8(s0) ; LP64E-WITHFP-NEXT: sd a4, 16(s0) ; LP64E-WITHFP-NEXT: sd a5, 24(s0) -; LP64E-WITHFP-NEXT: addi a3, s0, 8 +; LP64E-WITHFP-NEXT: addi a0, s0, 8 +; LP64E-WITHFP-NEXT: sd a0, -24(s0) ; LP64E-WITHFP-NEXT: add a0, a1, a2 -; LP64E-WITHFP-NEXT: sd a3, -24(s0) ; LP64E-WITHFP-NEXT: ld ra, 16(sp) # 8-byte Folded Reload ; LP64E-WITHFP-NEXT: ld s0, 8(sp) # 8-byte Folded Reload ; LP64E-WITHFP-NEXT: addi sp, sp, 56 @@ -2275,40 +2275,40 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32-ILP32F-FPELIM: # %bb.0: ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, -64 ; ILP32-ILP32F-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; ILP32-ILP32F-FPELIM-NEXT: li a4, 17 -; ILP32-ILP32F-FPELIM-NEXT: li a5, 16 -; ILP32-ILP32F-FPELIM-NEXT: li a6, 15 -; ILP32-ILP32F-FPELIM-NEXT: lui a7, 262236 -; ILP32-ILP32F-FPELIM-NEXT: lui t0, 377487 -; ILP32-ILP32F-FPELIM-NEXT: li t1, 14 -; ILP32-ILP32F-FPELIM-NEXT: lui t2, 262153 -; ILP32-ILP32F-FPELIM-NEXT: lui t3, 545260 -; ILP32-ILP32F-FPELIM-NEXT: lui t4, 964690 -; ILP32-ILP32F-FPELIM-NEXT: lui t5, 335544 -; ILP32-ILP32F-FPELIM-NEXT: lui t6, 688509 +; ILP32-ILP32F-FPELIM-NEXT: li a3, 17 +; ILP32-ILP32F-FPELIM-NEXT: li a4, 16 +; ILP32-ILP32F-FPELIM-NEXT: li a5, 15 +; ILP32-ILP32F-FPELIM-NEXT: lui a6, 262236 +; ILP32-ILP32F-FPELIM-NEXT: lui a7, 377487 +; ILP32-ILP32F-FPELIM-NEXT: li t0, 14 +; ILP32-ILP32F-FPELIM-NEXT: lui t1, 262153 +; ILP32-ILP32F-FPELIM-NEXT: lui t2, 545260 +; ILP32-ILP32F-FPELIM-NEXT: lui t3, 964690 +; ILP32-ILP32F-FPELIM-NEXT: lui t4, 335544 +; ILP32-ILP32F-FPELIM-NEXT: lui t5, 688509 ; ILP32-ILP32F-FPELIM-NEXT: li a0, 1 ; ILP32-ILP32F-FPELIM-NEXT: li a1, 11 ; ILP32-ILP32F-FPELIM-NEXT: addi a2, sp, 32 +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 20(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a3, 24(sp) ; ILP32-ILP32F-FPELIM-NEXT: li a3, 12 -; ILP32-ILP32F-FPELIM-NEXT: sw a5, 20(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a4, 24(sp) +; ILP32-ILP32F-FPELIM-NEXT: addi a4, a6, 655 +; ILP32-ILP32F-FPELIM-NEXT: addi a6, a7, 1475 +; ILP32-ILP32F-FPELIM-NEXT: sw t0, 0(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a6, 8(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a4, 12(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a5, 16(sp) ; ILP32-ILP32F-FPELIM-NEXT: li a4, 13 -; ILP32-ILP32F-FPELIM-NEXT: addi a5, a7, 655 -; ILP32-ILP32F-FPELIM-NEXT: addi a7, t0, 1475 -; ILP32-ILP32F-FPELIM-NEXT: sw t1, 0(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a7, 8(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a5, 12(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw a6, 16(sp) -; ILP32-ILP32F-FPELIM-NEXT: li a7, 4 -; ILP32-ILP32F-FPELIM-NEXT: addi a5, t2, 491 -; ILP32-ILP32F-FPELIM-NEXT: addi t0, t3, -1967 -; ILP32-ILP32F-FPELIM-NEXT: addi t1, t4, -328 -; ILP32-ILP32F-FPELIM-NEXT: addi t2, t5, 1311 -; ILP32-ILP32F-FPELIM-NEXT: addi a6, t6, -2048 -; ILP32-ILP32F-FPELIM-NEXT: sw t2, 32(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw t1, 36(sp) -; ILP32-ILP32F-FPELIM-NEXT: sw t0, 40(sp) +; ILP32-ILP32F-FPELIM-NEXT: addi a5, t1, 491 +; ILP32-ILP32F-FPELIM-NEXT: addi a7, t2, -1967 +; ILP32-ILP32F-FPELIM-NEXT: addi t0, t3, -328 +; ILP32-ILP32F-FPELIM-NEXT: addi t1, t4, 1311 +; ILP32-ILP32F-FPELIM-NEXT: addi a6, t5, -2048 +; ILP32-ILP32F-FPELIM-NEXT: sw t1, 32(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw t0, 36(sp) +; ILP32-ILP32F-FPELIM-NEXT: sw a7, 40(sp) ; ILP32-ILP32F-FPELIM-NEXT: sw a5, 44(sp) +; ILP32-ILP32F-FPELIM-NEXT: li a7, 4 ; ILP32-ILP32F-FPELIM-NEXT: call va5_aligned_stack_callee ; ILP32-ILP32F-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; ILP32-ILP32F-FPELIM-NEXT: addi sp, sp, 64 @@ -2320,40 +2320,40 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32-ILP32F-WITHFP-NEXT: sw ra, 60(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; ILP32-ILP32F-WITHFP-NEXT: addi s0, sp, 64 -; ILP32-ILP32F-WITHFP-NEXT: li a4, 17 -; ILP32-ILP32F-WITHFP-NEXT: li a5, 16 -; ILP32-ILP32F-WITHFP-NEXT: li a6, 15 -; ILP32-ILP32F-WITHFP-NEXT: lui a7, 262236 -; ILP32-ILP32F-WITHFP-NEXT: lui t0, 377487 -; ILP32-ILP32F-WITHFP-NEXT: li t1, 14 -; ILP32-ILP32F-WITHFP-NEXT: lui t2, 262153 -; ILP32-ILP32F-WITHFP-NEXT: lui t3, 545260 -; ILP32-ILP32F-WITHFP-NEXT: lui t4, 964690 -; ILP32-ILP32F-WITHFP-NEXT: lui t5, 335544 -; ILP32-ILP32F-WITHFP-NEXT: lui t6, 688509 +; ILP32-ILP32F-WITHFP-NEXT: li a3, 17 +; ILP32-ILP32F-WITHFP-NEXT: li a4, 16 +; ILP32-ILP32F-WITHFP-NEXT: li a5, 15 +; ILP32-ILP32F-WITHFP-NEXT: lui a6, 262236 +; ILP32-ILP32F-WITHFP-NEXT: lui a7, 377487 +; ILP32-ILP32F-WITHFP-NEXT: li t0, 14 +; ILP32-ILP32F-WITHFP-NEXT: lui t1, 262153 +; ILP32-ILP32F-WITHFP-NEXT: lui t2, 545260 +; ILP32-ILP32F-WITHFP-NEXT: lui t3, 964690 +; ILP32-ILP32F-WITHFP-NEXT: lui t4, 335544 +; ILP32-ILP32F-WITHFP-NEXT: lui t5, 688509 ; ILP32-ILP32F-WITHFP-NEXT: li a0, 1 ; ILP32-ILP32F-WITHFP-NEXT: li a1, 11 ; ILP32-ILP32F-WITHFP-NEXT: addi a2, s0, -32 +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 20(sp) +; ILP32-ILP32F-WITHFP-NEXT: sw a3, 24(sp) ; ILP32-ILP32F-WITHFP-NEXT: li a3, 12 -; ILP32-ILP32F-WITHFP-NEXT: sw a5, 20(sp) -; ILP32-ILP32F-WITHFP-NEXT: sw a4, 24(sp) +; ILP32-ILP32F-WITHFP-NEXT: addi a4, a6, 655 +; ILP32-ILP32F-WITHFP-NEXT: addi a6, a7, 1475 +; ILP32-ILP32F-WITHFP-NEXT: sw t0, 0(sp) +; ILP32-ILP32F-WITHFP-NEXT: sw a6, 8(sp) +; ILP32-ILP32F-WITHFP-NEXT: sw a4, 12(sp) +; ILP32-ILP32F-WITHFP-NEXT: sw a5, 16(sp) ; ILP32-ILP32F-WITHFP-NEXT: li a4, 13 -; ILP32-ILP32F-WITHFP-NEXT: addi a5, a7, 655 -; ILP32-ILP32F-WITHFP-NEXT: addi a7, t0, 1475 -; ILP32-ILP32F-WITHFP-NEXT: sw t1, 0(sp) -; ILP32-ILP32F-WITHFP-NEXT: sw a7, 8(sp) -; ILP32-ILP32F-WITHFP-NEXT: sw a5, 12(sp) -; ILP32-ILP32F-WITHFP-NEXT: sw a6, 16(sp) -; ILP32-ILP32F-WITHFP-NEXT: li a7, 4 -; ILP32-ILP32F-WITHFP-NEXT: addi a5, t2, 491 -; ILP32-ILP32F-WITHFP-NEXT: addi t0, t3, -1967 -; ILP32-ILP32F-WITHFP-NEXT: addi t1, t4, -328 -; ILP32-ILP32F-WITHFP-NEXT: addi t2, t5, 1311 -; ILP32-ILP32F-WITHFP-NEXT: addi a6, t6, -2048 -; ILP32-ILP32F-WITHFP-NEXT: sw t2, -32(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw t1, -28(s0) -; ILP32-ILP32F-WITHFP-NEXT: sw t0, -24(s0) +; ILP32-ILP32F-WITHFP-NEXT: addi a5, t1, 491 +; ILP32-ILP32F-WITHFP-NEXT: addi a7, t2, -1967 +; ILP32-ILP32F-WITHFP-NEXT: addi t0, t3, -328 +; ILP32-ILP32F-WITHFP-NEXT: addi t1, t4, 1311 +; ILP32-ILP32F-WITHFP-NEXT: addi a6, t5, -2048 +; ILP32-ILP32F-WITHFP-NEXT: sw t1, -32(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw t0, -28(s0) +; ILP32-ILP32F-WITHFP-NEXT: sw a7, -24(s0) ; ILP32-ILP32F-WITHFP-NEXT: sw a5, -20(s0) +; ILP32-ILP32F-WITHFP-NEXT: li a7, 4 ; ILP32-ILP32F-WITHFP-NEXT: call va5_aligned_stack_callee ; ILP32-ILP32F-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; ILP32-ILP32F-WITHFP-NEXT: lw s0, 56(sp) # 4-byte Folded Reload @@ -2364,40 +2364,40 @@ define void @va5_aligned_stack_caller() nounwind { ; RV32D-ILP32-ILP32F-ILP32D-FPELIM: # %bb.0: ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, -64 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw ra, 60(sp) # 4-byte Folded Spill -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a5, 262236 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a6, 377487 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a4, 17 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a7, 16 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li t0, 15 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li t1, 14 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t2, 262153 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t3, 545260 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t4, 964690 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t5, 335544 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t6, 688509 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a4, 262236 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui a5, 377487 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a3, 17 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a6, 16 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a7, 15 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li t0, 14 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t1, 262153 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t2, 545260 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t3, 964690 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t4, 335544 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lui t5, 688509 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a0, 1 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a1, 11 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a2, sp, 32 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 20(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a3, 24(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a3, 12 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 20(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 24(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a4, a4, 655 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, a5, 1475 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t0, 0(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 8(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a4, 12(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 16(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a4, 13 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, a5, 655 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a6, a6, 1475 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t1, 0(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a6, 8(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 12(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t0, 16(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a7, 4 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, t2, 491 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t0, t3, -1967 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t1, t4, -328 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t2, t5, 1311 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a6, t6, -2048 -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t2, 32(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t1, 36(sp) -; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t0, 40(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a5, t1, 491 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a7, t2, -1967 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t0, t3, -328 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi t1, t4, 1311 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi a6, t5, -2048 +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t1, 32(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw t0, 36(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a7, 40(sp) ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: sw a5, 44(sp) +; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: li a7, 4 ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: call va5_aligned_stack_callee ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload ; RV32D-ILP32-ILP32F-ILP32D-FPELIM-NEXT: addi sp, sp, 64 @@ -2410,41 +2410,41 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32E-FPELIM-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; ILP32E-FPELIM-NEXT: addi s0, sp, 64 ; ILP32E-FPELIM-NEXT: andi sp, sp, -16 -; ILP32E-FPELIM-NEXT: li a3, 17 -; ILP32E-FPELIM-NEXT: li a4, 16 -; ILP32E-FPELIM-NEXT: li a5, 15 -; ILP32E-FPELIM-NEXT: lui a6, 262236 -; ILP32E-FPELIM-NEXT: lui a7, 377487 -; ILP32E-FPELIM-NEXT: li t0, 14 -; ILP32E-FPELIM-NEXT: li t1, 4 -; ILP32E-FPELIM-NEXT: lui t2, 262153 -; ILP32E-FPELIM-NEXT: lui t3, 545260 -; ILP32E-FPELIM-NEXT: lui t4, 964690 -; ILP32E-FPELIM-NEXT: lui t5, 335544 -; ILP32E-FPELIM-NEXT: lui t6, 688509 +; ILP32E-FPELIM-NEXT: li a2, 17 +; ILP32E-FPELIM-NEXT: li a3, 16 +; ILP32E-FPELIM-NEXT: li a4, 15 +; ILP32E-FPELIM-NEXT: lui a5, 262236 +; ILP32E-FPELIM-NEXT: lui a6, 377487 +; ILP32E-FPELIM-NEXT: li a7, 14 +; ILP32E-FPELIM-NEXT: li t0, 4 +; ILP32E-FPELIM-NEXT: lui t1, 262153 +; ILP32E-FPELIM-NEXT: lui t2, 545260 +; ILP32E-FPELIM-NEXT: lui t3, 964690 +; ILP32E-FPELIM-NEXT: lui t4, 335544 +; ILP32E-FPELIM-NEXT: lui t5, 688509 ; ILP32E-FPELIM-NEXT: li a0, 1 ; ILP32E-FPELIM-NEXT: li a1, 11 +; ILP32E-FPELIM-NEXT: sw a4, 16(sp) +; ILP32E-FPELIM-NEXT: sw a3, 20(sp) +; ILP32E-FPELIM-NEXT: sw a2, 24(sp) ; ILP32E-FPELIM-NEXT: addi a2, sp, 32 -; ILP32E-FPELIM-NEXT: sw a5, 16(sp) -; ILP32E-FPELIM-NEXT: sw a4, 20(sp) -; ILP32E-FPELIM-NEXT: sw a3, 24(sp) +; ILP32E-FPELIM-NEXT: addi a3, a5, 655 +; ILP32E-FPELIM-NEXT: addi a4, a6, 1475 +; ILP32E-FPELIM-NEXT: sw t0, 0(sp) +; ILP32E-FPELIM-NEXT: sw a7, 4(sp) +; ILP32E-FPELIM-NEXT: sw a4, 8(sp) +; ILP32E-FPELIM-NEXT: sw a3, 12(sp) ; ILP32E-FPELIM-NEXT: li a3, 12 -; ILP32E-FPELIM-NEXT: addi a4, a6, 655 -; ILP32E-FPELIM-NEXT: addi a5, a7, 1475 -; ILP32E-FPELIM-NEXT: sw t1, 0(sp) -; ILP32E-FPELIM-NEXT: sw t0, 4(sp) -; ILP32E-FPELIM-NEXT: sw a5, 8(sp) -; ILP32E-FPELIM-NEXT: sw a4, 12(sp) +; ILP32E-FPELIM-NEXT: addi a4, t1, 491 +; ILP32E-FPELIM-NEXT: addi a6, t2, -1967 +; ILP32E-FPELIM-NEXT: addi a7, t3, -328 +; ILP32E-FPELIM-NEXT: addi t0, t4, 1311 +; ILP32E-FPELIM-NEXT: addi a5, t5, -2048 +; ILP32E-FPELIM-NEXT: sw t0, 32(sp) +; ILP32E-FPELIM-NEXT: sw a7, 36(sp) +; ILP32E-FPELIM-NEXT: sw a6, 40(sp) +; ILP32E-FPELIM-NEXT: sw a4, 44(sp) ; ILP32E-FPELIM-NEXT: li a4, 13 -; ILP32E-FPELIM-NEXT: addi a6, t2, 491 -; ILP32E-FPELIM-NEXT: addi a7, t3, -1967 -; ILP32E-FPELIM-NEXT: addi t0, t4, -328 -; ILP32E-FPELIM-NEXT: addi t1, t5, 1311 -; ILP32E-FPELIM-NEXT: addi a5, t6, -2048 -; ILP32E-FPELIM-NEXT: sw t1, 32(sp) -; ILP32E-FPELIM-NEXT: sw t0, 36(sp) -; ILP32E-FPELIM-NEXT: sw a7, 40(sp) -; ILP32E-FPELIM-NEXT: sw a6, 44(sp) ; ILP32E-FPELIM-NEXT: call va5_aligned_stack_callee ; ILP32E-FPELIM-NEXT: addi sp, s0, -64 ; ILP32E-FPELIM-NEXT: lw ra, 60(sp) # 4-byte Folded Reload @@ -2459,41 +2459,41 @@ define void @va5_aligned_stack_caller() nounwind { ; ILP32E-WITHFP-NEXT: sw s0, 56(sp) # 4-byte Folded Spill ; ILP32E-WITHFP-NEXT: addi s0, sp, 64 ; ILP32E-WITHFP-NEXT: andi sp, sp, -16 -; ILP32E-WITHFP-NEXT: li a3, 17 -; ILP32E-WITHFP-NEXT: li a4, 16 -; ILP32E-WITHFP-NEXT: li a5, 15 -; ILP32E-WITHFP-NEXT: lui a6, 262236 -; ILP32E-WITHFP-NEXT: lui a7, 377487 -; ILP32E-WITHFP-NEXT: li t0, 14 -; ILP32E-WITHFP-NEXT: li t1, 4 -; ILP32E-WITHFP-NEXT: lui t2, 262153 -; ILP32E-WITHFP-NEXT: lui t3, 545260 -; ILP32E-WITHFP-NEXT: lui t4, 964690 -; ILP32E-WITHFP-NEXT: lui t5, 335544 -; ILP32E-WITHFP-NEXT: lui t6, 688509 +; ILP32E-WITHFP-NEXT: li a2, 17 +; ILP32E-WITHFP-NEXT: li a3, 16 +; ILP32E-WITHFP-NEXT: li a4, 15 +; ILP32E-WITHFP-NEXT: lui a5, 262236 +; ILP32E-WITHFP-NEXT: lui a6, 377487 +; ILP32E-WITHFP-NEXT: li a7, 14 +; ILP32E-WITHFP-NEXT: li t0, 4 +; ILP32E-WITHFP-NEXT: lui t1, 262153 +; ILP32E-WITHFP-NEXT: lui t2, 545260 +; ILP32E-WITHFP-NEXT: lui t3, 964690 +; ILP32E-WITHFP-NEXT: lui t4, 335544 +; ILP32E-WITHFP-NEXT: lui t5, 688509 ; ILP32E-WITHFP-NEXT: li a0, 1 ; ILP32E-WITHFP-NEXT: li a1, 11 +; ILP32E-WITHFP-NEXT: sw a4, 16(sp) +; ILP32E-WITHFP-NEXT: sw a3, 20(sp) +; ILP32E-WITHFP-NEXT: sw a2, 24(sp) ; ILP32E-WITHFP-NEXT: addi a2, sp, 32 -; ILP32E-WITHFP-NEXT: sw a5, 16(sp) -; ILP32E-WITHFP-NEXT: sw a4, 20(sp) -; ILP32E-WITHFP-NEXT: sw a3, 24(sp) +; ILP32E-WITHFP-NEXT: addi a3, a5, 655 +; ILP32E-WITHFP-NEXT: addi a4, a6, 1475 +; ILP32E-WITHFP-NEXT: sw t0, 0(sp) +; ILP32E-WITHFP-NEXT: sw a7, 4(sp) +; ILP32E-WITHFP-NEXT: sw a4, 8(sp) +; ILP32E-WITHFP-NEXT: sw a3, 12(sp) ; ILP32E-WITHFP-NEXT: li a3, 12 -; ILP32E-WITHFP-NEXT: addi a4, a6, 655 -; ILP32E-WITHFP-NEXT: addi a5, a7, 1475 -; ILP32E-WITHFP-NEXT: sw t1, 0(sp) -; ILP32E-WITHFP-NEXT: sw t0, 4(sp) -; ILP32E-WITHFP-NEXT: sw a5, 8(sp) -; ILP32E-WITHFP-NEXT: sw a4, 12(sp) +; ILP32E-WITHFP-NEXT: addi a4, t1, 491 +; ILP32E-WITHFP-NEXT: addi a6, t2, -1967 +; ILP32E-WITHFP-NEXT: addi a7, t3, -328 +; ILP32E-WITHFP-NEXT: addi t0, t4, 1311 +; ILP32E-WITHFP-NEXT: addi a5, t5, -2048 +; ILP32E-WITHFP-NEXT: sw t0, 32(sp) +; ILP32E-WITHFP-NEXT: sw a7, 36(sp) +; ILP32E-WITHFP-NEXT: sw a6, 40(sp) +; ILP32E-WITHFP-NEXT: sw a4, 44(sp) ; ILP32E-WITHFP-NEXT: li a4, 13 -; ILP32E-WITHFP-NEXT: addi a6, t2, 491 -; ILP32E-WITHFP-NEXT: addi a7, t3, -1967 -; ILP32E-WITHFP-NEXT: addi t0, t4, -328 -; ILP32E-WITHFP-NEXT: addi t1, t5, 1311 -; ILP32E-WITHFP-NEXT: addi a5, t6, -2048 -; ILP32E-WITHFP-NEXT: sw t1, 32(sp) -; ILP32E-WITHFP-NEXT: sw t0, 36(sp) -; ILP32E-WITHFP-NEXT: sw a7, 40(sp) -; ILP32E-WITHFP-NEXT: sw a6, 44(sp) ; ILP32E-WITHFP-NEXT: call va5_aligned_stack_callee ; ILP32E-WITHFP-NEXT: addi sp, s0, -64 ; ILP32E-WITHFP-NEXT: lw ra, 60(sp) # 4-byte Folded Reload @@ -2505,27 +2505,27 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64-LP64F-LP64D-FPELIM: # %bb.0: ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, -48 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd ra, 40(sp) # 8-byte Folded Spill -; LP64-LP64F-LP64D-FPELIM-NEXT: li t0, 17 -; LP64-LP64F-LP64D-FPELIM-NEXT: li t1, 16 -; LP64-LP64F-LP64D-FPELIM-NEXT: li t2, 15 +; LP64-LP64F-LP64D-FPELIM-NEXT: li a7, 17 +; LP64-LP64F-LP64D-FPELIM-NEXT: li t0, 16 +; LP64-LP64F-LP64D-FPELIM-NEXT: li t1, 15 ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a2, %hi(.LCPI11_0) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a3, %hi(.LCPI11_1) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a6, %hi(.LCPI11_2) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui t3, 2384 +; LP64-LP64F-LP64D-FPELIM-NEXT: lui t2, 2384 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a0, 1 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a1, 11 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a4, 12 ; LP64-LP64F-LP64D-FPELIM-NEXT: li a5, 13 -; LP64-LP64F-LP64D-FPELIM-NEXT: li a7, 14 -; LP64-LP64F-LP64D-FPELIM-NEXT: ld t4, %lo(.LCPI11_0)(a2) +; LP64-LP64F-LP64D-FPELIM-NEXT: ld t3, %lo(.LCPI11_0)(a2) ; LP64-LP64F-LP64D-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(a3) ; LP64-LP64F-LP64D-FPELIM-NEXT: ld a3, %lo(.LCPI11_2)(a6) -; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a6, t3, 761 +; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a6, t2, 761 ; LP64-LP64F-LP64D-FPELIM-NEXT: slli a6, a6, 11 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd t4, 0(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd t2, 8(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd t1, 16(sp) -; LP64-LP64F-LP64D-FPELIM-NEXT: sd t0, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd t3, 0(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd t1, 8(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd t0, 16(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 24(sp) +; LP64-LP64F-LP64D-FPELIM-NEXT: li a7, 14 ; LP64-LP64F-LP64D-FPELIM-NEXT: call va5_aligned_stack_callee ; LP64-LP64F-LP64D-FPELIM-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-FPELIM-NEXT: addi sp, sp, 48 @@ -2537,27 +2537,27 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64-LP64F-LP64D-WITHFP-NEXT: sd ra, 40(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: sd s0, 32(sp) # 8-byte Folded Spill ; LP64-LP64F-LP64D-WITHFP-NEXT: addi s0, sp, 48 -; LP64-LP64F-LP64D-WITHFP-NEXT: li t0, 17 -; LP64-LP64F-LP64D-WITHFP-NEXT: li t1, 16 -; LP64-LP64F-LP64D-WITHFP-NEXT: li t2, 15 +; LP64-LP64F-LP64D-WITHFP-NEXT: li a7, 17 +; LP64-LP64F-LP64D-WITHFP-NEXT: li t0, 16 +; LP64-LP64F-LP64D-WITHFP-NEXT: li t1, 15 ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a2, %hi(.LCPI11_0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a3, %hi(.LCPI11_1) ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a6, %hi(.LCPI11_2) -; LP64-LP64F-LP64D-WITHFP-NEXT: lui t3, 2384 +; LP64-LP64F-LP64D-WITHFP-NEXT: lui t2, 2384 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a0, 1 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a1, 11 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a4, 12 ; LP64-LP64F-LP64D-WITHFP-NEXT: li a5, 13 -; LP64-LP64F-LP64D-WITHFP-NEXT: li a7, 14 -; LP64-LP64F-LP64D-WITHFP-NEXT: ld t4, %lo(.LCPI11_0)(a2) +; LP64-LP64F-LP64D-WITHFP-NEXT: ld t3, %lo(.LCPI11_0)(a2) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(a3) ; LP64-LP64F-LP64D-WITHFP-NEXT: ld a3, %lo(.LCPI11_2)(a6) -; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a6, t3, 761 +; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a6, t2, 761 ; LP64-LP64F-LP64D-WITHFP-NEXT: slli a6, a6, 11 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd t4, 0(sp) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd t2, 8(sp) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd t1, 16(sp) -; LP64-LP64F-LP64D-WITHFP-NEXT: sd t0, 24(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd t3, 0(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd t1, 8(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd t0, 16(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 24(sp) +; LP64-LP64F-LP64D-WITHFP-NEXT: li a7, 14 ; LP64-LP64F-LP64D-WITHFP-NEXT: call va5_aligned_stack_callee ; LP64-LP64F-LP64D-WITHFP-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; LP64-LP64F-LP64D-WITHFP-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -2570,27 +2570,27 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64E-FPELIM-NEXT: sd ra, 48(sp) # 8-byte Folded Spill ; LP64E-FPELIM-NEXT: li a2, 17 ; LP64E-FPELIM-NEXT: li a3, 16 -; LP64E-FPELIM-NEXT: li a6, 15 -; LP64E-FPELIM-NEXT: lui a7, %hi(.LCPI11_0) -; LP64E-FPELIM-NEXT: li t0, 14 -; LP64E-FPELIM-NEXT: lui t1, 2384 -; LP64E-FPELIM-NEXT: lui t2, %hi(.LCPI11_1) -; LP64E-FPELIM-NEXT: lui t3, %hi(.LCPI11_2) +; LP64E-FPELIM-NEXT: li a5, 15 +; LP64E-FPELIM-NEXT: lui a6, %hi(.LCPI11_0) +; LP64E-FPELIM-NEXT: li a7, 14 +; LP64E-FPELIM-NEXT: lui t0, 2384 +; LP64E-FPELIM-NEXT: lui t1, %hi(.LCPI11_1) +; LP64E-FPELIM-NEXT: lui t2, %hi(.LCPI11_2) ; LP64E-FPELIM-NEXT: li a0, 1 ; LP64E-FPELIM-NEXT: li a1, 11 -; LP64E-FPELIM-NEXT: li a4, 12 ; LP64E-FPELIM-NEXT: sd a3, 32(sp) ; LP64E-FPELIM-NEXT: sd a2, 40(sp) +; LP64E-FPELIM-NEXT: li a4, 12 +; LP64E-FPELIM-NEXT: ld a6, %lo(.LCPI11_0)(a6) +; LP64E-FPELIM-NEXT: addiw t0, t0, 761 +; LP64E-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(t1) +; LP64E-FPELIM-NEXT: ld a3, %lo(.LCPI11_2)(t2) +; LP64E-FPELIM-NEXT: slli t0, t0, 11 +; LP64E-FPELIM-NEXT: sd t0, 0(sp) +; LP64E-FPELIM-NEXT: sd a7, 8(sp) +; LP64E-FPELIM-NEXT: sd a6, 16(sp) +; LP64E-FPELIM-NEXT: sd a5, 24(sp) ; LP64E-FPELIM-NEXT: li a5, 13 -; LP64E-FPELIM-NEXT: ld a7, %lo(.LCPI11_0)(a7) -; LP64E-FPELIM-NEXT: addiw t1, t1, 761 -; LP64E-FPELIM-NEXT: ld a2, %lo(.LCPI11_1)(t2) -; LP64E-FPELIM-NEXT: ld a3, %lo(.LCPI11_2)(t3) -; LP64E-FPELIM-NEXT: slli t1, t1, 11 -; LP64E-FPELIM-NEXT: sd t1, 0(sp) -; LP64E-FPELIM-NEXT: sd t0, 8(sp) -; LP64E-FPELIM-NEXT: sd a7, 16(sp) -; LP64E-FPELIM-NEXT: sd a6, 24(sp) ; LP64E-FPELIM-NEXT: call va5_aligned_stack_callee ; LP64E-FPELIM-NEXT: ld ra, 48(sp) # 8-byte Folded Reload ; LP64E-FPELIM-NEXT: addi sp, sp, 56 @@ -2604,27 +2604,27 @@ define void @va5_aligned_stack_caller() nounwind { ; LP64E-WITHFP-NEXT: addi s0, sp, 64 ; LP64E-WITHFP-NEXT: li a2, 17 ; LP64E-WITHFP-NEXT: li a3, 16 -; LP64E-WITHFP-NEXT: li a6, 15 -; LP64E-WITHFP-NEXT: lui a7, %hi(.LCPI11_0) -; LP64E-WITHFP-NEXT: li t0, 14 -; LP64E-WITHFP-NEXT: lui t1, 2384 -; LP64E-WITHFP-NEXT: lui t2, %hi(.LCPI11_1) -; LP64E-WITHFP-NEXT: lui t3, %hi(.LCPI11_2) +; LP64E-WITHFP-NEXT: li a5, 15 +; LP64E-WITHFP-NEXT: lui a6, %hi(.LCPI11_0) +; LP64E-WITHFP-NEXT: li a7, 14 +; LP64E-WITHFP-NEXT: lui t0, 2384 +; LP64E-WITHFP-NEXT: lui t1, %hi(.LCPI11_1) +; LP64E-WITHFP-NEXT: lui t2, %hi(.LCPI11_2) ; LP64E-WITHFP-NEXT: li a0, 1 ; LP64E-WITHFP-NEXT: li a1, 11 -; LP64E-WITHFP-NEXT: li a4, 12 ; LP64E-WITHFP-NEXT: sd a3, 32(sp) ; LP64E-WITHFP-NEXT: sd a2, 40(sp) +; LP64E-WITHFP-NEXT: li a4, 12 +; LP64E-WITHFP-NEXT: ld a6, %lo(.LCPI11_0)(a6) +; LP64E-WITHFP-NEXT: addiw t0, t0, 761 +; LP64E-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(t1) +; LP64E-WITHFP-NEXT: ld a3, %lo(.LCPI11_2)(t2) +; LP64E-WITHFP-NEXT: slli t0, t0, 11 +; LP64E-WITHFP-NEXT: sd t0, 0(sp) +; LP64E-WITHFP-NEXT: sd a7, 8(sp) +; LP64E-WITHFP-NEXT: sd a6, 16(sp) +; LP64E-WITHFP-NEXT: sd a5, 24(sp) ; LP64E-WITHFP-NEXT: li a5, 13 -; LP64E-WITHFP-NEXT: ld a7, %lo(.LCPI11_0)(a7) -; LP64E-WITHFP-NEXT: addiw t1, t1, 761 -; LP64E-WITHFP-NEXT: ld a2, %lo(.LCPI11_1)(t2) -; LP64E-WITHFP-NEXT: ld a3, %lo(.LCPI11_2)(t3) -; LP64E-WITHFP-NEXT: slli t1, t1, 11 -; LP64E-WITHFP-NEXT: sd t1, 0(sp) -; LP64E-WITHFP-NEXT: sd t0, 8(sp) -; LP64E-WITHFP-NEXT: sd a7, 16(sp) -; LP64E-WITHFP-NEXT: sd a6, 24(sp) ; LP64E-WITHFP-NEXT: call va5_aligned_stack_callee ; LP64E-WITHFP-NEXT: ld ra, 56(sp) # 8-byte Folded Reload ; LP64E-WITHFP-NEXT: ld s0, 48(sp) # 8-byte Folded Reload @@ -2994,8 +2994,26 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; LP64-LP64F-LP64D-FPELIM-NEXT: .cfi_def_cfa_offset 100000080 ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 ; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 312(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 320(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 328(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a1, 280(a0) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 288(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 296(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 +; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 +; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 304(a0) +; LP64-LP64F-LP64D-FPELIM-NEXT: lui a0, 24414 ; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a0, a0, 284 ; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: sd a0, 8(sp) @@ -3003,24 +3021,6 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; LP64-LP64F-LP64D-FPELIM-NEXT: add a0, sp, a0 ; LP64-LP64F-LP64D-FPELIM-NEXT: lw a0, 280(a0) ; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 -; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a5, 312(a1) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 -; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a6, 320(a1) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 -; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a7, 328(a1) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 -; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a2, 288(a1) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 -; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a3, 296(a1) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 -; LP64-LP64F-LP64D-FPELIM-NEXT: add a1, sp, a1 -; LP64-LP64F-LP64D-FPELIM-NEXT: sd a4, 304(a1) -; LP64-LP64F-LP64D-FPELIM-NEXT: lui a1, 24414 ; LP64-LP64F-LP64D-FPELIM-NEXT: addiw a1, a1, 336 ; LP64-LP64F-LP64D-FPELIM-NEXT: add sp, sp, a1 ; LP64-LP64F-LP64D-FPELIM-NEXT: .cfi_def_cfa_offset 0 @@ -3039,18 +3039,18 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a0, 24414 ; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a0, a0, -1680 ; LP64-LP64F-LP64D-WITHFP-NEXT: sub sp, sp, a0 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) -; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 12 -; LP64-LP64F-LP64D-WITHFP-NEXT: lui a1, 24414 -; LP64-LP64F-LP64D-WITHFP-NEXT: sub a1, s0, a1 -; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -288(a1) -; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a5, 40(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a6, 48(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a7, 56(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a1, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a2, 16(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a3, 24(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: sd a4, 32(s0) +; LP64-LP64F-LP64D-WITHFP-NEXT: addi a0, s0, 12 +; LP64-LP64F-LP64D-WITHFP-NEXT: lui a1, 24414 +; LP64-LP64F-LP64D-WITHFP-NEXT: sub a1, s0, a1 +; LP64-LP64F-LP64D-WITHFP-NEXT: sd a0, -288(a1) +; LP64-LP64F-LP64D-WITHFP-NEXT: lw a0, 8(s0) ; LP64-LP64F-LP64D-WITHFP-NEXT: lui a1, 24414 ; LP64-LP64F-LP64D-WITHFP-NEXT: addiw a1, a1, -1680 ; LP64-LP64F-LP64D-WITHFP-NEXT: add sp, sp, a1 @@ -3070,28 +3070,28 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; LP64E-FPELIM-NEXT: sub sp, sp, a0 ; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 100000064 ; LP64E-FPELIM-NEXT: lui a0, 24414 -; LP64E-FPELIM-NEXT: addiw a0, a0, 284 ; LP64E-FPELIM-NEXT: add a0, sp, a0 -; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: sd a5, 312(a0) ; LP64E-FPELIM-NEXT: lui a0, 24414 ; LP64E-FPELIM-NEXT: add a0, sp, a0 ; LP64E-FPELIM-NEXT: sd a1, 280(a0) ; LP64E-FPELIM-NEXT: lui a0, 24414 ; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a2, 288(a0) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a3, 296(a0) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a4, 304(a0) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: addiw a0, a0, 284 +; LP64E-FPELIM-NEXT: add a0, sp, a0 +; LP64E-FPELIM-NEXT: sd a0, 8(sp) +; LP64E-FPELIM-NEXT: lui a0, 24414 +; LP64E-FPELIM-NEXT: add a0, sp, a0 ; LP64E-FPELIM-NEXT: lw a0, 280(a0) ; LP64E-FPELIM-NEXT: lui a1, 24414 -; LP64E-FPELIM-NEXT: add a1, sp, a1 -; LP64E-FPELIM-NEXT: sd a5, 312(a1) -; LP64E-FPELIM-NEXT: lui a1, 24414 -; LP64E-FPELIM-NEXT: add a1, sp, a1 -; LP64E-FPELIM-NEXT: sd a2, 288(a1) -; LP64E-FPELIM-NEXT: lui a1, 24414 -; LP64E-FPELIM-NEXT: add a1, sp, a1 -; LP64E-FPELIM-NEXT: sd a3, 296(a1) -; LP64E-FPELIM-NEXT: lui a1, 24414 -; LP64E-FPELIM-NEXT: add a1, sp, a1 -; LP64E-FPELIM-NEXT: sd a4, 304(a1) -; LP64E-FPELIM-NEXT: lui a1, 24414 ; LP64E-FPELIM-NEXT: addiw a1, a1, 320 ; LP64E-FPELIM-NEXT: add sp, sp, a1 ; LP64E-FPELIM-NEXT: .cfi_def_cfa_offset 0 @@ -3110,16 +3110,16 @@ define i32 @va_large_stack(ptr %fmt, ...) { ; LP64E-WITHFP-NEXT: lui a0, 24414 ; LP64E-WITHFP-NEXT: addiw a0, a0, -1704 ; LP64E-WITHFP-NEXT: sub sp, sp, a0 -; LP64E-WITHFP-NEXT: addi a0, s0, 12 -; LP64E-WITHFP-NEXT: lui a6, 24414 -; LP64E-WITHFP-NEXT: sub a6, s0, a6 -; LP64E-WITHFP-NEXT: sd a0, -288(a6) -; LP64E-WITHFP-NEXT: sd a1, 8(s0) -; LP64E-WITHFP-NEXT: lw a0, 8(s0) ; LP64E-WITHFP-NEXT: sd a5, 40(s0) +; LP64E-WITHFP-NEXT: sd a1, 8(s0) ; LP64E-WITHFP-NEXT: sd a2, 16(s0) ; LP64E-WITHFP-NEXT: sd a3, 24(s0) ; LP64E-WITHFP-NEXT: sd a4, 32(s0) +; LP64E-WITHFP-NEXT: addi a0, s0, 12 +; LP64E-WITHFP-NEXT: lui a1, 24414 +; LP64E-WITHFP-NEXT: sub a1, s0, a1 +; LP64E-WITHFP-NEXT: sd a0, -288(a1) +; LP64E-WITHFP-NEXT: lw a0, 8(s0) ; LP64E-WITHFP-NEXT: lui a1, 24414 ; LP64E-WITHFP-NEXT: addiw a1, a1, -1704 ; LP64E-WITHFP-NEXT: add sp, sp, a1 diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll index 437b7e557718c..13beb844dec36 100644 --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -9,9 +9,9 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) ; RV64I-NEXT: lb a0, 3(a0) -; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: slli a1, a1, 3 @@ -29,26 +29,26 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: lshr_4bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 2(a0) -; RV32I-NEXT: lbu a5, 3(a0) -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a5, a5, 24 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: lbu a3, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 16 @@ -73,9 +73,9 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) ; RV64I-NEXT: lb a0, 3(a0) -; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: slli a1, a1, 3 @@ -93,26 +93,26 @@ define void @shl_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: shl_4bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 2(a0) -; RV32I-NEXT: lbu a5, 3(a0) -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a5, a5, 24 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: lbu a3, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 16 @@ -137,9 +137,9 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) ; RV64I-NEXT: lb a0, 3(a0) -; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a4, a4, 8 ; RV64I-NEXT: or a3, a4, a3 +; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: slli a1, a1, 3 @@ -157,26 +157,26 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: ashr_4bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 2(a0) -; RV32I-NEXT: lbu a5, 3(a0) -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a5, a5, 24 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: lbu a3, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: slli a1, a1, 3 ; RV32I-NEXT: sra a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 16 @@ -224,20 +224,20 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t2, t2, 24 ; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: or a7, t2, t0 +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t1, 1(a1) -; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: lbu t2, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or a7, t1, a7 +; RV64I-NEXT: or t0, t1, t0 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a4, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a4, a4, 35 @@ -263,40 +263,40 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: lshr_8bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 5(a0) -; RV32I-NEXT: lbu a4, 6(a0) -; RV32I-NEXT: lbu a5, 7(a0) -; RV32I-NEXT: lbu a6, 4(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a5, a5, 24 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a3, 4(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 6(a0) +; RV32I-NEXT: lbu a6, 7(a0) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a6, 1(a1) +; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a4, a6, a4 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a5, a4, a3 -; RV32I-NEXT: or a4, a1, a6 -; RV32I-NEXT: slli a4, a4, 3 -; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a4, a1, 3 ; RV32I-NEXT: srl a1, a5, a4 +; RV32I-NEXT: addi a3, a4, -32 ; RV32I-NEXT: bltz a3, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: lbu a6, 1(a0) -; RV32I-NEXT: lbu a7, 0(a0) +; RV32I-NEXT: lbu a6, 0(a0) +; RV32I-NEXT: lbu a7, 1(a0) ; RV32I-NEXT: lbu t0, 2(a0) ; RV32I-NEXT: lbu a0, 3(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 ; RV32I-NEXT: slli a5, a5, 1 ; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a0, a0, 24 @@ -360,20 +360,20 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t2, t2, 24 ; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: or a7, t2, t0 +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t1, 1(a1) -; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: lbu t2, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or a7, t1, a7 +; RV64I-NEXT: or t0, t1, t0 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a4, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a4, a4, 35 @@ -399,40 +399,40 @@ define void @shl_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: shl_8bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 2(a0) -; RV32I-NEXT: lbu a5, 3(a0) -; RV32I-NEXT: lbu a6, 0(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a5, a5, 24 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a6, 1(a1) +; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a4, a6, a4 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a5, a4, a3 -; RV32I-NEXT: or a4, a1, a6 -; RV32I-NEXT: slli a4, a4, 3 -; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: slli a4, a1, 3 ; RV32I-NEXT: sll a1, a5, a4 +; RV32I-NEXT: addi a3, a4, -32 ; RV32I-NEXT: bltz a3, .LBB4_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: j .LBB4_3 ; RV32I-NEXT: .LBB4_2: -; RV32I-NEXT: lbu a6, 5(a0) -; RV32I-NEXT: lbu a7, 4(a0) +; RV32I-NEXT: lbu a6, 4(a0) +; RV32I-NEXT: lbu a7, 5(a0) ; RV32I-NEXT: lbu t0, 6(a0) ; RV32I-NEXT: lbu a0, 7(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 ; RV32I-NEXT: srli a5, a5, 1 ; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a0, a0, 24 @@ -496,20 +496,20 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t2, t2, 24 ; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: or a7, t2, t0 +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t1, 1(a1) -; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: lbu t2, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or a7, t1, a7 +; RV64I-NEXT: or t0, t1, t0 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a4, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a4, a4, 35 @@ -535,42 +535,41 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: ashr_8bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 5(a0) -; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a3, 4(a0) +; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 6(a0) ; RV32I-NEXT: lbu a6, 7(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t0, 1(a1) -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 2(a1) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: lbu t0, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, a4 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a1, a1, t0 ; RV32I-NEXT: slli a4, a5, 16 ; RV32I-NEXT: slli a5, a6, 24 ; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: or a4, a4, a3 ; RV32I-NEXT: or a3, a1, a7 ; RV32I-NEXT: slli a3, a3, 3 -; RV32I-NEXT: addi a6, a3, -32 ; RV32I-NEXT: sra a1, a4, a3 +; RV32I-NEXT: addi a6, a3, -32 ; RV32I-NEXT: bltz a6, .LBB5_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srai a5, a5, 31 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: srai a1, a5, 31 ; RV32I-NEXT: j .LBB5_3 ; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: lbu a5, 1(a0) -; RV32I-NEXT: lbu a6, 0(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a6, 1(a0) ; RV32I-NEXT: lbu a7, 2(a0) ; RV32I-NEXT: lbu a0, 3(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: slli a4, a4, 1 ; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a0, a0, 24 @@ -633,54 +632,54 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t2, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: or a7, t3, t0 +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t2, 1(a1) -; RV64I-NEXT: or t0, t3, t0 ; RV64I-NEXT: lbu t3, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, t1, a5 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a6, a5, 35 ; RV64I-NEXT: or a5, a4, a3 ; RV64I-NEXT: or a4, a6, a1 -; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: srl a1, a5, a4 +; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: bltz a3, .LBB6_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: j .LBB6_3 ; RV64I-NEXT: .LBB6_2: -; RV64I-NEXT: lbu a6, 1(a0) -; RV64I-NEXT: lbu a7, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) -; RV64I-NEXT: lbu t1, 0(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: lbu t1, 4(a0) -; RV64I-NEXT: lbu t2, 5(a0) -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu t0, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: lbu a6, 0(a0) +; RV64I-NEXT: lbu a7, 1(a0) +; RV64I-NEXT: lbu t0, 2(a0) +; RV64I-NEXT: lbu t1, 3(a0) +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, t2 ; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: not a7, a4 ; RV64I-NEXT: slli a5, a5, 1 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: srl a0, a0, a4 @@ -787,10 +786,10 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: andi a1, a1, 12 ; RV32I-NEXT: add a1, t2, a1 ; RV32I-NEXT: andi a3, a0, 24 +; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: lw a4, 0(a1) ; RV32I-NEXT: lw a5, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: srl a7, a5, a0 ; RV32I-NEXT: slli t0, a6, 1 @@ -872,54 +871,54 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t2, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: or a7, t3, t0 +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t2, 1(a1) -; RV64I-NEXT: or t0, t3, t0 ; RV64I-NEXT: lbu t3, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, t1, a5 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a1, a1, 5 ; RV64I-NEXT: slli a6, a5, 37 ; RV64I-NEXT: or a5, a4, a3 ; RV64I-NEXT: or a4, a6, a1 -; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: srl a1, a5, a4 +; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: bltz a3, .LBB7_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: j .LBB7_3 ; RV64I-NEXT: .LBB7_2: -; RV64I-NEXT: lbu a6, 1(a0) -; RV64I-NEXT: lbu a7, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) -; RV64I-NEXT: lbu t1, 0(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: lbu t1, 4(a0) -; RV64I-NEXT: lbu t2, 5(a0) -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu t0, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: lbu a6, 0(a0) +; RV64I-NEXT: lbu a7, 1(a0) +; RV64I-NEXT: lbu t0, 2(a0) +; RV64I-NEXT: lbu t1, 3(a0) +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, t2 ; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: not a7, a4 ; RV64I-NEXT: slli a5, a5, 1 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: srl a0, a0, a4 @@ -1016,38 +1015,38 @@ define void @lshr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: sw a4, 4(sp) ; RV32I-NEXT: sw a5, 8(sp) ; RV32I-NEXT: sw a0, 12(sp) -; RV32I-NEXT: lw a0, 8(a1) +; RV32I-NEXT: lw a0, 0(a1) ; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: srli a5, a0, 16 -; RV32I-NEXT: srli a6, a0, 24 -; RV32I-NEXT: srli a7, a0, 8 +; RV32I-NEXT: srli a5, a4, 16 +; RV32I-NEXT: srli a6, a4, 24 +; RV32I-NEXT: srli a7, a4, 8 ; RV32I-NEXT: srli t0, a1, 16 ; RV32I-NEXT: srli t1, a1, 24 ; RV32I-NEXT: srli t2, a1, 8 -; RV32I-NEXT: srli t3, a4, 16 -; RV32I-NEXT: srli t4, a4, 24 -; RV32I-NEXT: srli t5, a4, 8 +; RV32I-NEXT: srli t3, a0, 16 +; RV32I-NEXT: srli t4, a0, 24 +; RV32I-NEXT: srli t5, a0, 8 ; RV32I-NEXT: srli t6, a3, 16 -; RV32I-NEXT: sb a0, 8(a2) +; RV32I-NEXT: sb a4, 8(a2) ; RV32I-NEXT: sb a7, 9(a2) ; RV32I-NEXT: sb a5, 10(a2) ; RV32I-NEXT: sb a6, 11(a2) -; RV32I-NEXT: srli a0, a3, 24 +; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: sb a1, 12(a2) ; RV32I-NEXT: sb t2, 13(a2) ; RV32I-NEXT: sb t0, 14(a2) ; RV32I-NEXT: sb t1, 15(a2) ; RV32I-NEXT: srli a1, a3, 8 -; RV32I-NEXT: sb a4, 0(a2) +; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: sb t5, 1(a2) ; RV32I-NEXT: sb t3, 2(a2) ; RV32I-NEXT: sb t4, 3(a2) ; RV32I-NEXT: sb a3, 4(a2) ; RV32I-NEXT: sb a1, 5(a2) ; RV32I-NEXT: sb t6, 6(a2) -; RV32I-NEXT: sb a0, 7(a2) +; RV32I-NEXT: sb a4, 7(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -1087,54 +1086,54 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t2, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: or a7, t3, t0 +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t2, 1(a1) -; RV64I-NEXT: or t0, t3, t0 ; RV64I-NEXT: lbu t3, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, t1, a5 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a6, a5, 35 ; RV64I-NEXT: or a5, a4, a3 ; RV64I-NEXT: or a4, a6, a1 -; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: sll a1, a5, a4 +; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: bltz a3, .LBB8_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: j .LBB8_3 ; RV64I-NEXT: .LBB8_2: -; RV64I-NEXT: lbu a6, 9(a0) -; RV64I-NEXT: lbu a7, 10(a0) -; RV64I-NEXT: lbu t0, 11(a0) -; RV64I-NEXT: lbu t1, 8(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: lbu t1, 12(a0) -; RV64I-NEXT: lbu t2, 13(a0) -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu t0, 14(a0) -; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: lbu a6, 8(a0) +; RV64I-NEXT: lbu a7, 9(a0) +; RV64I-NEXT: lbu t0, 10(a0) +; RV64I-NEXT: lbu t1, 11(a0) +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: lbu t0, 12(a0) +; RV64I-NEXT: lbu t1, 13(a0) +; RV64I-NEXT: lbu t2, 14(a0) +; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, t2 ; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: not a7, a4 ; RV64I-NEXT: srli a5, a5, 1 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: sll a0, a0, a4 @@ -1241,11 +1240,11 @@ define void @shl_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: andi a1, a1, 12 ; RV32I-NEXT: sub a1, t2, a1 ; RV32I-NEXT: andi a3, a0, 24 +; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: lw a4, 0(a1) ; RV32I-NEXT: lw a5, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) ; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: sll a7, a5, a0 ; RV32I-NEXT: srli t0, a4, 1 ; RV32I-NEXT: sll a1, a1, a0 @@ -1326,54 +1325,54 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t2, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: or a7, t3, t0 +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t2, 1(a1) -; RV64I-NEXT: or t0, t3, t0 ; RV64I-NEXT: lbu t3, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, t1, a5 -; RV64I-NEXT: or a5, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a5, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a1, a1, 5 ; RV64I-NEXT: slli a6, a5, 37 ; RV64I-NEXT: or a5, a4, a3 ; RV64I-NEXT: or a4, a6, a1 -; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: sll a1, a5, a4 +; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: bltz a3, .LBB9_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: j .LBB9_3 ; RV64I-NEXT: .LBB9_2: -; RV64I-NEXT: lbu a6, 9(a0) -; RV64I-NEXT: lbu a7, 10(a0) -; RV64I-NEXT: lbu t0, 11(a0) -; RV64I-NEXT: lbu t1, 8(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: lbu t1, 12(a0) -; RV64I-NEXT: lbu t2, 13(a0) -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu t0, 14(a0) -; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: lbu a6, 8(a0) +; RV64I-NEXT: lbu a7, 9(a0) +; RV64I-NEXT: lbu t0, 10(a0) +; RV64I-NEXT: lbu t1, 11(a0) +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: lbu t0, 12(a0) +; RV64I-NEXT: lbu t1, 13(a0) +; RV64I-NEXT: lbu t2, 14(a0) +; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, t2 ; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: not a7, a4 ; RV64I-NEXT: srli a5, a5, 1 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: sll a0, a0, a4 @@ -1470,38 +1469,38 @@ define void @shl_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sw a4, 20(sp) ; RV32I-NEXT: sw a5, 24(sp) ; RV32I-NEXT: sw a0, 28(sp) -; RV32I-NEXT: lw a0, 8(a1) +; RV32I-NEXT: lw a0, 0(a1) ; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: srli a5, a0, 16 -; RV32I-NEXT: srli a6, a0, 24 -; RV32I-NEXT: srli a7, a0, 8 +; RV32I-NEXT: srli a5, a4, 16 +; RV32I-NEXT: srli a6, a4, 24 +; RV32I-NEXT: srli a7, a4, 8 ; RV32I-NEXT: srli t0, a1, 16 ; RV32I-NEXT: srli t1, a1, 24 ; RV32I-NEXT: srli t2, a1, 8 -; RV32I-NEXT: srli t3, a4, 16 -; RV32I-NEXT: srli t4, a4, 24 -; RV32I-NEXT: srli t5, a4, 8 +; RV32I-NEXT: srli t3, a0, 16 +; RV32I-NEXT: srli t4, a0, 24 +; RV32I-NEXT: srli t5, a0, 8 ; RV32I-NEXT: srli t6, a3, 16 -; RV32I-NEXT: sb a0, 8(a2) +; RV32I-NEXT: sb a4, 8(a2) ; RV32I-NEXT: sb a7, 9(a2) ; RV32I-NEXT: sb a5, 10(a2) ; RV32I-NEXT: sb a6, 11(a2) -; RV32I-NEXT: srli a0, a3, 24 +; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: sb a1, 12(a2) ; RV32I-NEXT: sb t2, 13(a2) ; RV32I-NEXT: sb t0, 14(a2) ; RV32I-NEXT: sb t1, 15(a2) ; RV32I-NEXT: srli a1, a3, 8 -; RV32I-NEXT: sb a4, 0(a2) +; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: sb t5, 1(a2) ; RV32I-NEXT: sb t3, 2(a2) ; RV32I-NEXT: sb t4, 3(a2) ; RV32I-NEXT: sb a3, 4(a2) ; RV32I-NEXT: sb a1, 5(a2) ; RV32I-NEXT: sb t6, 6(a2) -; RV32I-NEXT: sb a0, 7(a2) +; RV32I-NEXT: sb a4, 7(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -1542,56 +1541,55 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t2, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: or a7, t3, t0 +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t2, 1(a1) -; RV64I-NEXT: or t0, t3, t0 ; RV64I-NEXT: lbu t3, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a5, t1, a5 -; RV64I-NEXT: or a4, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a6, a5, 32 ; RV64I-NEXT: slli a1, a1, 3 ; RV64I-NEXT: slli a7, a4, 35 ; RV64I-NEXT: or a4, a6, a3 ; RV64I-NEXT: or a3, a7, a1 -; RV64I-NEXT: addi a6, a3, -64 ; RV64I-NEXT: sra a1, a4, a3 +; RV64I-NEXT: addi a6, a3, -64 ; RV64I-NEXT: bltz a6, .LBB10_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sraiw a3, a5, 31 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: sraiw a1, a5, 31 ; RV64I-NEXT: j .LBB10_3 ; RV64I-NEXT: .LBB10_2: -; RV64I-NEXT: lbu a5, 1(a0) -; RV64I-NEXT: lbu a6, 2(a0) -; RV64I-NEXT: lbu a7, 3(a0) -; RV64I-NEXT: lbu t0, 0(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a5, t0 -; RV64I-NEXT: lbu t0, 4(a0) -; RV64I-NEXT: lbu t1, 5(a0) -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: lbu a5, 0(a0) +; RV64I-NEXT: lbu a6, 1(a0) +; RV64I-NEXT: lbu a7, 2(a0) +; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: lbu a7, 4(a0) +; RV64I-NEXT: lbu t0, 5(a0) +; RV64I-NEXT: lbu t1, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: not a6, a3 ; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, a7 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: srl a0, a0, a3 @@ -1665,17 +1663,17 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli t1, t1, 8 ; RV32I-NEXT: or a4, t3, a4 ; RV32I-NEXT: or t3, t5, t4 -; RV32I-NEXT: lbu t4, 0(a1) -; RV32I-NEXT: lbu t5, 1(a1) ; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 2(a1) +; RV32I-NEXT: lbu t1, 0(a1) +; RV32I-NEXT: lbu t4, 1(a1) +; RV32I-NEXT: lbu t5, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t1, t4, t1 +; RV32I-NEXT: slli t5, t5, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t1 -; RV32I-NEXT: mv t1, sp +; RV32I-NEXT: or a1, a1, t5 +; RV32I-NEXT: mv t4, sp ; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or t2, a0, t2 @@ -1684,7 +1682,7 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: or a5, a7, a6 ; RV32I-NEXT: or a4, t3, a4 ; RV32I-NEXT: or a6, t2, t0 -; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: or a1, a1, t1 ; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: sw a0, 20(sp) ; RV32I-NEXT: sw a0, 24(sp) @@ -1695,12 +1693,12 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: sw a6, 12(sp) ; RV32I-NEXT: slli a0, a1, 3 ; RV32I-NEXT: andi a1, a1, 12 -; RV32I-NEXT: add a1, t1, a1 +; RV32I-NEXT: add a1, t4, a1 ; RV32I-NEXT: andi a3, a0, 24 +; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: lw a4, 0(a1) ; RV32I-NEXT: lw a5, 4(a1) ; RV32I-NEXT: lw a6, 8(a1) -; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: lw a1, 12(a1) ; RV32I-NEXT: srl a7, a5, a0 ; RV32I-NEXT: slli t0, a6, 1 @@ -1782,56 +1780,55 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t2, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 0(a1) +; RV64I-NEXT: or a7, t3, t0 +; RV64I-NEXT: lbu t0, 0(a1) ; RV64I-NEXT: lbu t2, 1(a1) -; RV64I-NEXT: or t0, t3, t0 ; RV64I-NEXT: lbu t3, 2(a1) ; RV64I-NEXT: lbu a1, 3(a1) ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a5, t1, a5 -; RV64I-NEXT: or a4, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a6, a5, 32 ; RV64I-NEXT: slli a1, a1, 5 ; RV64I-NEXT: slli a7, a4, 37 ; RV64I-NEXT: or a4, a6, a3 ; RV64I-NEXT: or a3, a7, a1 -; RV64I-NEXT: addi a6, a3, -64 ; RV64I-NEXT: sra a1, a4, a3 +; RV64I-NEXT: addi a6, a3, -64 ; RV64I-NEXT: bltz a6, .LBB11_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sraiw a3, a5, 31 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: sraiw a1, a5, 31 ; RV64I-NEXT: j .LBB11_3 ; RV64I-NEXT: .LBB11_2: -; RV64I-NEXT: lbu a5, 1(a0) -; RV64I-NEXT: lbu a6, 2(a0) -; RV64I-NEXT: lbu a7, 3(a0) -; RV64I-NEXT: lbu t0, 0(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a5, t0 -; RV64I-NEXT: lbu t0, 4(a0) -; RV64I-NEXT: lbu t1, 5(a0) -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: lbu a5, 0(a0) +; RV64I-NEXT: lbu a6, 1(a0) +; RV64I-NEXT: lbu a7, 2(a0) +; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: lbu a7, 4(a0) +; RV64I-NEXT: lbu t0, 5(a0) +; RV64I-NEXT: lbu t1, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: not a6, a3 ; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, a7 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: srl a0, a0, a3 @@ -1927,38 +1924,38 @@ define void @ashr_16bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: sw a4, 4(sp) ; RV32I-NEXT: sw a5, 8(sp) ; RV32I-NEXT: sw a6, 12(sp) -; RV32I-NEXT: lw a0, 8(a1) +; RV32I-NEXT: lw a0, 0(a1) ; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 0(a1) +; RV32I-NEXT: lw a4, 8(a1) ; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: srli a5, a0, 16 -; RV32I-NEXT: srli a6, a0, 24 -; RV32I-NEXT: srli a7, a0, 8 +; RV32I-NEXT: srli a5, a4, 16 +; RV32I-NEXT: srli a6, a4, 24 +; RV32I-NEXT: srli a7, a4, 8 ; RV32I-NEXT: srli t0, a1, 16 ; RV32I-NEXT: srli t1, a1, 24 ; RV32I-NEXT: srli t2, a1, 8 -; RV32I-NEXT: srli t3, a4, 16 -; RV32I-NEXT: srli t4, a4, 24 -; RV32I-NEXT: srli t5, a4, 8 +; RV32I-NEXT: srli t3, a0, 16 +; RV32I-NEXT: srli t4, a0, 24 +; RV32I-NEXT: srli t5, a0, 8 ; RV32I-NEXT: srli t6, a3, 16 -; RV32I-NEXT: sb a0, 8(a2) +; RV32I-NEXT: sb a4, 8(a2) ; RV32I-NEXT: sb a7, 9(a2) ; RV32I-NEXT: sb a5, 10(a2) ; RV32I-NEXT: sb a6, 11(a2) -; RV32I-NEXT: srli a0, a3, 24 +; RV32I-NEXT: srli a4, a3, 24 ; RV32I-NEXT: sb a1, 12(a2) ; RV32I-NEXT: sb t2, 13(a2) ; RV32I-NEXT: sb t0, 14(a2) ; RV32I-NEXT: sb t1, 15(a2) ; RV32I-NEXT: srli a1, a3, 8 -; RV32I-NEXT: sb a4, 0(a2) +; RV32I-NEXT: sb a0, 0(a2) ; RV32I-NEXT: sb t5, 1(a2) ; RV32I-NEXT: sb t3, 2(a2) ; RV32I-NEXT: sb t4, 3(a2) ; RV32I-NEXT: sb a3, 4(a2) ; RV32I-NEXT: sb a1, 5(a2) ; RV32I-NEXT: sb t6, 6(a2) -; RV32I-NEXT: sb a0, 7(a2) +; RV32I-NEXT: sb a4, 7(a2) ; RV32I-NEXT: addi sp, sp, 32 ; RV32I-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -2065,13 +2062,13 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli s7, s7, 24 ; RV64I-NEXT: or s5, s6, s5 ; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: lbu s3, 4(a1) +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 4(a1) ; RV64I-NEXT: lbu s6, 5(a1) -; RV64I-NEXT: or s4, s7, s4 ; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli s6, s6, 8 -; RV64I-NEXT: or s3, s6, s3 +; RV64I-NEXT: or s4, s6, s4 ; RV64I-NEXT: sd zero, 32(sp) ; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: sd zero, 48(sp) @@ -2088,8 +2085,8 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, t5 ; RV64I-NEXT: or t0, s0, t6 ; RV64I-NEXT: or t1, s5, s1 -; RV64I-NEXT: or t2, s4, s2 -; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: or a1, a1, s4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a6, a6, 32 ; RV64I-NEXT: slli a0, a0, 32 @@ -2108,21 +2105,21 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: andi a1, a1, 24 ; RV64I-NEXT: add a1, s6, a1 ; RV64I-NEXT: andi a0, a4, 56 +; RV64I-NEXT: xori a5, a0, 63 ; RV64I-NEXT: ld a3, 0(a1) -; RV64I-NEXT: ld a5, 8(a1) -; RV64I-NEXT: ld a6, 16(a1) -; RV64I-NEXT: xori a7, a0, 63 +; RV64I-NEXT: ld a6, 8(a1) +; RV64I-NEXT: ld a7, 16(a1) ; RV64I-NEXT: ld t0, 24(a1) -; RV64I-NEXT: srl a0, a5, a4 -; RV64I-NEXT: slli t1, a6, 1 +; RV64I-NEXT: srl a0, a6, a4 +; RV64I-NEXT: slli t1, a7, 1 ; RV64I-NEXT: srl a1, a3, a4 -; RV64I-NEXT: slli a5, a5, 1 -; RV64I-NEXT: srl a3, a6, a4 -; RV64I-NEXT: slli a6, t0, 1 +; RV64I-NEXT: slli a6, a6, 1 +; RV64I-NEXT: srl a3, a7, a4 +; RV64I-NEXT: slli a7, t0, 1 ; RV64I-NEXT: srl t0, t0, a4 -; RV64I-NEXT: sll a4, t1, a7 -; RV64I-NEXT: sll a5, a5, a7 -; RV64I-NEXT: sll a6, a6, a7 +; RV64I-NEXT: sll a4, t1, a5 +; RV64I-NEXT: sll a6, a6, a5 +; RV64I-NEXT: sll a5, a7, a5 ; RV64I-NEXT: srli a7, t0, 56 ; RV64I-NEXT: srli t1, t0, 48 ; RV64I-NEXT: srli t2, t0, 40 @@ -2131,8 +2128,8 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srli t5, t0, 16 ; RV64I-NEXT: srli t6, t0, 8 ; RV64I-NEXT: or a4, a0, a4 -; RV64I-NEXT: or a5, a1, a5 -; RV64I-NEXT: or a6, a3, a6 +; RV64I-NEXT: or a6, a1, a6 +; RV64I-NEXT: or a5, a3, a5 ; RV64I-NEXT: sb t3, 28(a2) ; RV64I-NEXT: sb t2, 29(a2) ; RV64I-NEXT: sb t1, 30(a2) @@ -2141,20 +2138,20 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb t6, 25(a2) ; RV64I-NEXT: sb t5, 26(a2) ; RV64I-NEXT: sb t4, 27(a2) -; RV64I-NEXT: srli a7, a6, 56 -; RV64I-NEXT: srli t0, a6, 48 -; RV64I-NEXT: srli t1, a6, 40 -; RV64I-NEXT: srli t2, a6, 32 -; RV64I-NEXT: srli t3, a6, 24 -; RV64I-NEXT: srli t4, a6, 16 -; RV64I-NEXT: srli a6, a6, 8 -; RV64I-NEXT: srli t5, a5, 56 -; RV64I-NEXT: srli t6, a5, 48 -; RV64I-NEXT: srli s0, a5, 40 -; RV64I-NEXT: srli s1, a5, 32 -; RV64I-NEXT: srli s2, a5, 24 -; RV64I-NEXT: srli s3, a5, 16 +; RV64I-NEXT: srli a7, a5, 56 +; RV64I-NEXT: srli t0, a5, 48 +; RV64I-NEXT: srli t1, a5, 40 +; RV64I-NEXT: srli t2, a5, 32 +; RV64I-NEXT: srli t3, a5, 24 +; RV64I-NEXT: srli t4, a5, 16 ; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli t5, a6, 56 +; RV64I-NEXT: srli t6, a6, 48 +; RV64I-NEXT: srli s0, a6, 40 +; RV64I-NEXT: srli s1, a6, 32 +; RV64I-NEXT: srli s2, a6, 24 +; RV64I-NEXT: srli s3, a6, 16 +; RV64I-NEXT: srli a6, a6, 8 ; RV64I-NEXT: srli s4, a4, 56 ; RV64I-NEXT: srli s5, a4, 48 ; RV64I-NEXT: srli s6, a4, 40 @@ -2164,7 +2161,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb a7, 23(a2) ; RV64I-NEXT: srli a7, a4, 32 ; RV64I-NEXT: sb a3, 16(a2) -; RV64I-NEXT: sb a6, 17(a2) +; RV64I-NEXT: sb a5, 17(a2) ; RV64I-NEXT: sb t4, 18(a2) ; RV64I-NEXT: sb t3, 19(a2) ; RV64I-NEXT: srli a3, a4, 24 @@ -2172,10 +2169,10 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb s0, 5(a2) ; RV64I-NEXT: sb t6, 6(a2) ; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: srli a5, a4, 16 ; RV64I-NEXT: srli a4, a4, 8 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: sb a6, 1(a2) ; RV64I-NEXT: sb s3, 2(a2) ; RV64I-NEXT: sb s2, 3(a2) ; RV64I-NEXT: sb a7, 12(a2) @@ -2184,7 +2181,7 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb s4, 15(a2) ; RV64I-NEXT: sb a0, 8(a2) ; RV64I-NEXT: sb a4, 9(a2) -; RV64I-NEXT: sb a6, 10(a2) +; RV64I-NEXT: sb a5, 10(a2) ; RV64I-NEXT: sb a3, 11(a2) ; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload @@ -2543,13 +2540,13 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli s7, s7, 24 ; RV64I-NEXT: or s5, s6, s5 ; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: lbu s3, 4(a1) +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 4(a1) ; RV64I-NEXT: lbu s6, 5(a1) -; RV64I-NEXT: or s4, s7, s4 ; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli s6, s6, 8 -; RV64I-NEXT: or s3, s6, s3 +; RV64I-NEXT: or s4, s6, s4 ; RV64I-NEXT: sd zero, 32(sp) ; RV64I-NEXT: sd zero, 40(sp) ; RV64I-NEXT: sd zero, 48(sp) @@ -2566,8 +2563,8 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: or a0, a0, t5 ; RV64I-NEXT: or t0, s0, t6 ; RV64I-NEXT: or t1, s5, s1 -; RV64I-NEXT: or t2, s4, s2 -; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: or a1, a1, s4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a6, a6, 32 ; RV64I-NEXT: slli a0, a0, 32 @@ -2587,24 +2584,24 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: andi a1, a1, 24 ; RV64I-NEXT: andi a0, a3, 32 ; RV64I-NEXT: add a1, s6, a1 -; RV64I-NEXT: ld a4, 0(a1) -; RV64I-NEXT: ld a5, 8(a1) -; RV64I-NEXT: ld a6, 16(a1) -; RV64I-NEXT: xori a7, a0, 63 +; RV64I-NEXT: xori a4, a0, 63 +; RV64I-NEXT: ld a5, 0(a1) +; RV64I-NEXT: ld a6, 8(a1) +; RV64I-NEXT: ld a7, 16(a1) ; RV64I-NEXT: ld t0, 24(a1) -; RV64I-NEXT: srl a0, a5, a3 -; RV64I-NEXT: slli t1, a6, 1 -; RV64I-NEXT: srl a1, a4, a3 -; RV64I-NEXT: slli a5, a5, 1 -; RV64I-NEXT: srl a4, a6, a3 -; RV64I-NEXT: slli a6, t0, 1 +; RV64I-NEXT: srl a0, a6, a3 +; RV64I-NEXT: slli t1, a7, 1 +; RV64I-NEXT: srl a1, a5, a3 +; RV64I-NEXT: slli a6, a6, 1 +; RV64I-NEXT: srl a5, a7, a3 +; RV64I-NEXT: slli a7, t0, 1 ; RV64I-NEXT: srl a3, t0, a3 -; RV64I-NEXT: sll t0, t1, a7 -; RV64I-NEXT: sll a5, a5, a7 -; RV64I-NEXT: sll a6, a6, a7 -; RV64I-NEXT: srli a7, a4, 24 -; RV64I-NEXT: srli t1, a4, 16 -; RV64I-NEXT: srli t2, a4, 8 +; RV64I-NEXT: sll t0, t1, a4 +; RV64I-NEXT: sll a6, a6, a4 +; RV64I-NEXT: sll a4, a7, a4 +; RV64I-NEXT: srli a7, a5, 24 +; RV64I-NEXT: srli t1, a5, 16 +; RV64I-NEXT: srli t2, a5, 8 ; RV64I-NEXT: srli t3, a3, 56 ; RV64I-NEXT: srli t4, a3, 48 ; RV64I-NEXT: srli t5, a3, 40 @@ -2616,19 +2613,19 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: srli s4, a1, 16 ; RV64I-NEXT: srli s5, a1, 8 ; RV64I-NEXT: srli s6, a0, 24 -; RV64I-NEXT: or a6, a4, a6 -; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: sb a5, 16(a2) ; RV64I-NEXT: sb t2, 17(a2) ; RV64I-NEXT: sb t1, 18(a2) ; RV64I-NEXT: sb a7, 19(a2) -; RV64I-NEXT: srli a4, a0, 16 +; RV64I-NEXT: srli a5, a0, 16 ; RV64I-NEXT: sb t6, 28(a2) ; RV64I-NEXT: sb t5, 29(a2) ; RV64I-NEXT: sb t4, 30(a2) ; RV64I-NEXT: sb t3, 31(a2) ; RV64I-NEXT: srli a7, a0, 8 ; RV64I-NEXT: or t0, a0, t0 -; RV64I-NEXT: or a5, a1, a5 +; RV64I-NEXT: or a6, a1, a6 ; RV64I-NEXT: sb a3, 24(a2) ; RV64I-NEXT: sb s2, 25(a2) ; RV64I-NEXT: sb s1, 26(a2) @@ -2639,16 +2636,16 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: sb s3, 3(a2) ; RV64I-NEXT: sb a0, 8(a2) ; RV64I-NEXT: sb a7, 9(a2) -; RV64I-NEXT: sb a4, 10(a2) +; RV64I-NEXT: sb a5, 10(a2) ; RV64I-NEXT: sb s6, 11(a2) -; RV64I-NEXT: srli a0, a6, 56 -; RV64I-NEXT: srli a1, a6, 48 -; RV64I-NEXT: srli a3, a6, 40 -; RV64I-NEXT: srli a4, a6, 32 -; RV64I-NEXT: srli a6, a5, 56 -; RV64I-NEXT: srli a7, a5, 48 -; RV64I-NEXT: srli t1, a5, 40 -; RV64I-NEXT: srli a5, a5, 32 +; RV64I-NEXT: srli a0, a4, 56 +; RV64I-NEXT: srli a1, a4, 48 +; RV64I-NEXT: srli a3, a4, 40 +; RV64I-NEXT: srli a4, a4, 32 +; RV64I-NEXT: srli a5, a6, 56 +; RV64I-NEXT: srli a7, a6, 48 +; RV64I-NEXT: srli t1, a6, 40 +; RV64I-NEXT: srli a6, a6, 32 ; RV64I-NEXT: srli t2, t0, 56 ; RV64I-NEXT: srli t3, t0, 48 ; RV64I-NEXT: srli t4, t0, 40 @@ -2657,10 +2654,10 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: sb a3, 21(a2) ; RV64I-NEXT: sb a1, 22(a2) ; RV64I-NEXT: sb a0, 23(a2) -; RV64I-NEXT: sb a5, 4(a2) +; RV64I-NEXT: sb a6, 4(a2) ; RV64I-NEXT: sb t1, 5(a2) ; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb a6, 7(a2) +; RV64I-NEXT: sb a5, 7(a2) ; RV64I-NEXT: sb t0, 12(a2) ; RV64I-NEXT: sb t4, 13(a2) ; RV64I-NEXT: sb t3, 14(a2) @@ -2797,13 +2794,13 @@ define void @lshr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: sw t0, 12(sp) ; RV32I-NEXT: sw t1, 16(sp) ; RV32I-NEXT: sw a5, 20(sp) -; RV32I-NEXT: lw a6, 16(t6) -; RV32I-NEXT: lw a5, 20(t6) -; RV32I-NEXT: lw a7, 24(t6) ; RV32I-NEXT: lw a1, 0(t6) ; RV32I-NEXT: lw a0, 4(t6) ; RV32I-NEXT: lw a4, 8(t6) ; RV32I-NEXT: lw a3, 12(t6) +; RV32I-NEXT: lw a6, 16(t6) +; RV32I-NEXT: lw a5, 20(t6) +; RV32I-NEXT: lw a7, 24(t6) ; RV32I-NEXT: lw t0, 28(t6) ; RV32I-NEXT: srli t1, a7, 24 ; RV32I-NEXT: srli t2, a7, 16 @@ -3001,9 +2998,9 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV64I-NEXT: sd a5, 8(sp) ; RV64I-NEXT: sd a3, 16(sp) ; RV64I-NEXT: sd a0, 24(sp) -; RV64I-NEXT: ld a4, 16(t3) -; RV64I-NEXT: ld a0, 8(t3) ; RV64I-NEXT: ld a1, 0(t3) +; RV64I-NEXT: ld a0, 8(t3) +; RV64I-NEXT: ld a4, 16(t3) ; RV64I-NEXT: ld a3, 24(t3) ; RV64I-NEXT: srli a5, a4, 56 ; RV64I-NEXT: srli a6, a4, 48 @@ -3197,13 +3194,13 @@ define void @lshr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: sw t0, 12(sp) ; RV32I-NEXT: sw t1, 16(sp) ; RV32I-NEXT: sw a5, 20(sp) -; RV32I-NEXT: lw a6, 16(t6) -; RV32I-NEXT: lw a5, 20(t6) -; RV32I-NEXT: lw a7, 24(t6) ; RV32I-NEXT: lw a1, 0(t6) ; RV32I-NEXT: lw a0, 4(t6) ; RV32I-NEXT: lw a4, 8(t6) ; RV32I-NEXT: lw a3, 12(t6) +; RV32I-NEXT: lw a6, 16(t6) +; RV32I-NEXT: lw a5, 20(t6) +; RV32I-NEXT: lw a7, 24(t6) ; RV32I-NEXT: lw t0, 28(t6) ; RV32I-NEXT: srli t1, a7, 24 ; RV32I-NEXT: srli t2, a7, 16 @@ -3380,13 +3377,13 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli s7, s7, 24 ; RV64I-NEXT: or s5, s6, s5 ; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: lbu s3, 4(a1) +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 4(a1) ; RV64I-NEXT: lbu s6, 5(a1) -; RV64I-NEXT: or s4, s7, s4 ; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli s6, s6, 8 -; RV64I-NEXT: or s3, s6, s3 +; RV64I-NEXT: or s4, s6, s4 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 16(sp) @@ -3403,8 +3400,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, t5 ; RV64I-NEXT: or t0, s0, t6 ; RV64I-NEXT: or t1, s5, s1 -; RV64I-NEXT: or t2, s4, s2 -; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: or a1, a1, s4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a6, a6, 32 ; RV64I-NEXT: slli a0, a0, 32 @@ -3423,11 +3420,11 @@ define void @shl_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: andi a1, a1, 24 ; RV64I-NEXT: sub a1, s6, a1 ; RV64I-NEXT: andi a3, a0, 56 +; RV64I-NEXT: xori a3, a3, 63 ; RV64I-NEXT: ld a4, 0(a1) ; RV64I-NEXT: ld a5, 8(a1) ; RV64I-NEXT: ld a6, 16(a1) ; RV64I-NEXT: ld a1, 24(a1) -; RV64I-NEXT: xori a3, a3, 63 ; RV64I-NEXT: sll a7, a5, a0 ; RV64I-NEXT: srli t0, a4, 1 ; RV64I-NEXT: sll t1, a1, a0 @@ -3858,13 +3855,13 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: slli s7, s7, 24 ; RV64I-NEXT: or s5, s6, s5 ; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: lbu s3, 4(a1) +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 4(a1) ; RV64I-NEXT: lbu s6, 5(a1) -; RV64I-NEXT: or s4, s7, s4 ; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli s6, s6, 8 -; RV64I-NEXT: or s3, s6, s3 +; RV64I-NEXT: or s4, s6, s4 ; RV64I-NEXT: sd zero, 0(sp) ; RV64I-NEXT: sd zero, 8(sp) ; RV64I-NEXT: sd zero, 16(sp) @@ -3881,8 +3878,8 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: or a0, a0, t5 ; RV64I-NEXT: or t0, s0, t6 ; RV64I-NEXT: or t1, s5, s1 -; RV64I-NEXT: or t2, s4, s2 -; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: or a1, a1, s4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a6, a6, 32 ; RV64I-NEXT: slli a0, a0, 32 @@ -3902,25 +3899,25 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: andi a1, a1, 24 ; RV64I-NEXT: andi a0, a3, 32 ; RV64I-NEXT: sub a1, s6, a1 -; RV64I-NEXT: ld a4, 0(a1) -; RV64I-NEXT: ld a5, 8(a1) -; RV64I-NEXT: ld a6, 16(a1) +; RV64I-NEXT: xori a4, a0, 63 +; RV64I-NEXT: ld a5, 0(a1) +; RV64I-NEXT: ld a6, 8(a1) +; RV64I-NEXT: ld a7, 16(a1) ; RV64I-NEXT: ld a1, 24(a1) -; RV64I-NEXT: xori a7, a0, 63 -; RV64I-NEXT: sll a0, a5, a3 -; RV64I-NEXT: srli t0, a4, 1 +; RV64I-NEXT: sll a0, a6, a3 +; RV64I-NEXT: srli t0, a5, 1 ; RV64I-NEXT: sll a1, a1, a3 -; RV64I-NEXT: srli t1, a6, 1 -; RV64I-NEXT: sll a6, a6, a3 -; RV64I-NEXT: srli a5, a5, 1 -; RV64I-NEXT: sll a3, a4, a3 -; RV64I-NEXT: srl a4, t0, a7 -; RV64I-NEXT: srl t0, t1, a7 -; RV64I-NEXT: srl a5, a5, a7 -; RV64I-NEXT: srli a7, a6, 56 -; RV64I-NEXT: srli t1, a6, 48 -; RV64I-NEXT: srli t2, a6, 40 -; RV64I-NEXT: srli t3, a6, 32 +; RV64I-NEXT: srli t1, a7, 1 +; RV64I-NEXT: sll a7, a7, a3 +; RV64I-NEXT: srli a6, a6, 1 +; RV64I-NEXT: sll a3, a5, a3 +; RV64I-NEXT: srl a5, t0, a4 +; RV64I-NEXT: srl t0, t1, a4 +; RV64I-NEXT: srl a4, a6, a4 +; RV64I-NEXT: srli a6, a7, 56 +; RV64I-NEXT: srli t1, a7, 48 +; RV64I-NEXT: srli t2, a7, 40 +; RV64I-NEXT: srli t3, a7, 32 ; RV64I-NEXT: srli t4, a1, 56 ; RV64I-NEXT: srli t5, a1, 48 ; RV64I-NEXT: srli t6, a1, 40 @@ -3933,19 +3930,19 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: srli s6, a3, 16 ; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: srli t0, a3, 8 -; RV64I-NEXT: or a5, a6, a5 -; RV64I-NEXT: srli a6, a0, 56 +; RV64I-NEXT: or a4, a7, a4 +; RV64I-NEXT: srli a7, a0, 56 ; RV64I-NEXT: sb t3, 20(a2) ; RV64I-NEXT: sb t2, 21(a2) ; RV64I-NEXT: sb t1, 22(a2) -; RV64I-NEXT: sb a7, 23(a2) -; RV64I-NEXT: srli a7, a0, 48 +; RV64I-NEXT: sb a6, 23(a2) +; RV64I-NEXT: srli a6, a0, 48 ; RV64I-NEXT: sb s0, 28(a2) ; RV64I-NEXT: sb t6, 29(a2) ; RV64I-NEXT: sb t5, 30(a2) ; RV64I-NEXT: sb t4, 31(a2) ; RV64I-NEXT: srli t1, a0, 40 -; RV64I-NEXT: or a4, a0, a4 +; RV64I-NEXT: or a5, a0, a5 ; RV64I-NEXT: srli a0, a0, 32 ; RV64I-NEXT: sb s4, 4(a2) ; RV64I-NEXT: sb s3, 5(a2) @@ -3957,18 +3954,18 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: sb s5, 3(a2) ; RV64I-NEXT: sb a0, 12(a2) ; RV64I-NEXT: sb t1, 13(a2) -; RV64I-NEXT: sb a7, 14(a2) -; RV64I-NEXT: sb a6, 15(a2) -; RV64I-NEXT: srli a0, a5, 24 -; RV64I-NEXT: srli a3, a5, 16 -; RV64I-NEXT: srli a6, a5, 8 +; RV64I-NEXT: sb a6, 14(a2) +; RV64I-NEXT: sb a7, 15(a2) +; RV64I-NEXT: srli a0, a4, 24 +; RV64I-NEXT: srli a3, a4, 16 +; RV64I-NEXT: srli a6, a4, 8 ; RV64I-NEXT: srli a7, a1, 24 ; RV64I-NEXT: srli t0, a1, 16 ; RV64I-NEXT: srli t1, a1, 8 -; RV64I-NEXT: srli t2, a4, 24 -; RV64I-NEXT: srli t3, a4, 16 -; RV64I-NEXT: srli t4, a4, 8 -; RV64I-NEXT: sb a5, 16(a2) +; RV64I-NEXT: srli t2, a5, 24 +; RV64I-NEXT: srli t3, a5, 16 +; RV64I-NEXT: srli t4, a5, 8 +; RV64I-NEXT: sb a4, 16(a2) ; RV64I-NEXT: sb a6, 17(a2) ; RV64I-NEXT: sb a3, 18(a2) ; RV64I-NEXT: sb a0, 19(a2) @@ -3976,7 +3973,7 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV64I-NEXT: sb t1, 25(a2) ; RV64I-NEXT: sb t0, 26(a2) ; RV64I-NEXT: sb a7, 27(a2) -; RV64I-NEXT: sb a4, 8(a2) +; RV64I-NEXT: sb a5, 8(a2) ; RV64I-NEXT: sb t4, 9(a2) ; RV64I-NEXT: sb t3, 10(a2) ; RV64I-NEXT: sb t2, 11(a2) @@ -4112,13 +4109,13 @@ define void @shl_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) nounw ; RV32I-NEXT: sw t0, 44(sp) ; RV32I-NEXT: sw t1, 48(sp) ; RV32I-NEXT: sw a5, 52(sp) -; RV32I-NEXT: lw a6, 16(t2) -; RV32I-NEXT: lw a5, 20(t2) -; RV32I-NEXT: lw a7, 24(t2) ; RV32I-NEXT: lw a1, 0(t2) ; RV32I-NEXT: lw a0, 4(t2) ; RV32I-NEXT: lw a4, 8(t2) ; RV32I-NEXT: lw a3, 12(t2) +; RV32I-NEXT: lw a6, 16(t2) +; RV32I-NEXT: lw a5, 20(t2) +; RV32I-NEXT: lw a7, 24(t2) ; RV32I-NEXT: lw t0, 28(t2) ; RV32I-NEXT: srli t1, a7, 24 ; RV32I-NEXT: srli t2, a7, 16 @@ -4316,9 +4313,9 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV64I-NEXT: sd a5, 40(sp) ; RV64I-NEXT: sd a3, 48(sp) ; RV64I-NEXT: sd a0, 56(sp) -; RV64I-NEXT: ld a4, 16(t2) -; RV64I-NEXT: ld a0, 8(t2) ; RV64I-NEXT: ld a1, 0(t2) +; RV64I-NEXT: ld a0, 8(t2) +; RV64I-NEXT: ld a4, 16(t2) ; RV64I-NEXT: ld a3, 24(t2) ; RV64I-NEXT: srli a5, a4, 56 ; RV64I-NEXT: srli a6, a4, 48 @@ -4512,13 +4509,13 @@ define void @shl_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) nou ; RV32I-NEXT: sw t0, 44(sp) ; RV32I-NEXT: sw t1, 48(sp) ; RV32I-NEXT: sw a5, 52(sp) -; RV32I-NEXT: lw a6, 16(t2) -; RV32I-NEXT: lw a5, 20(t2) -; RV32I-NEXT: lw a7, 24(t2) ; RV32I-NEXT: lw a1, 0(t2) ; RV32I-NEXT: lw a0, 4(t2) ; RV32I-NEXT: lw a4, 8(t2) ; RV32I-NEXT: lw a3, 12(t2) +; RV32I-NEXT: lw a6, 16(t2) +; RV32I-NEXT: lw a5, 20(t2) +; RV32I-NEXT: lw a7, 24(t2) ; RV32I-NEXT: lw t0, 28(t2) ; RV32I-NEXT: srli t1, a7, 24 ; RV32I-NEXT: srli t2, a7, 16 @@ -4695,13 +4692,13 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli s7, s7, 24 ; RV64I-NEXT: or s5, s6, s5 ; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: lbu s3, 4(a1) +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 4(a1) ; RV64I-NEXT: lbu s6, 5(a1) -; RV64I-NEXT: or s4, s7, s4 ; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli s6, s6, 8 -; RV64I-NEXT: or s3, s6, s3 +; RV64I-NEXT: or s4, s6, s4 ; RV64I-NEXT: slli s7, s7, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, s7 @@ -4714,8 +4711,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, t5 ; RV64I-NEXT: or t0, s0, t6 ; RV64I-NEXT: or t1, s5, s1 -; RV64I-NEXT: or t2, s4, s2 -; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: or a1, a1, s4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a6, a6, 32 ; RV64I-NEXT: slli a0, a0, 32 @@ -4739,21 +4736,21 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: andi a1, a1, 24 ; RV64I-NEXT: add a1, s6, a1 ; RV64I-NEXT: andi a0, a4, 56 +; RV64I-NEXT: xori a5, a0, 63 ; RV64I-NEXT: ld a3, 0(a1) -; RV64I-NEXT: ld a5, 8(a1) -; RV64I-NEXT: ld a6, 16(a1) -; RV64I-NEXT: xori a7, a0, 63 +; RV64I-NEXT: ld a6, 8(a1) +; RV64I-NEXT: ld a7, 16(a1) ; RV64I-NEXT: ld t0, 24(a1) -; RV64I-NEXT: srl a0, a5, a4 -; RV64I-NEXT: slli t1, a6, 1 +; RV64I-NEXT: srl a0, a6, a4 +; RV64I-NEXT: slli t1, a7, 1 ; RV64I-NEXT: srl a1, a3, a4 -; RV64I-NEXT: slli a5, a5, 1 -; RV64I-NEXT: srl a3, a6, a4 -; RV64I-NEXT: slli a6, t0, 1 +; RV64I-NEXT: slli a6, a6, 1 +; RV64I-NEXT: srl a3, a7, a4 +; RV64I-NEXT: slli a7, t0, 1 ; RV64I-NEXT: sra t0, t0, a4 -; RV64I-NEXT: sll a4, t1, a7 -; RV64I-NEXT: sll a5, a5, a7 -; RV64I-NEXT: sll a6, a6, a7 +; RV64I-NEXT: sll a4, t1, a5 +; RV64I-NEXT: sll a6, a6, a5 +; RV64I-NEXT: sll a5, a7, a5 ; RV64I-NEXT: srli a7, t0, 56 ; RV64I-NEXT: srli t1, t0, 48 ; RV64I-NEXT: srli t2, t0, 40 @@ -4762,8 +4759,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srli t5, t0, 16 ; RV64I-NEXT: srli t6, t0, 8 ; RV64I-NEXT: or a4, a0, a4 -; RV64I-NEXT: or a5, a1, a5 -; RV64I-NEXT: or a6, a3, a6 +; RV64I-NEXT: or a6, a1, a6 +; RV64I-NEXT: or a5, a3, a5 ; RV64I-NEXT: sb t3, 28(a2) ; RV64I-NEXT: sb t2, 29(a2) ; RV64I-NEXT: sb t1, 30(a2) @@ -4772,20 +4769,20 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb t6, 25(a2) ; RV64I-NEXT: sb t5, 26(a2) ; RV64I-NEXT: sb t4, 27(a2) -; RV64I-NEXT: srli a7, a6, 56 -; RV64I-NEXT: srli t0, a6, 48 -; RV64I-NEXT: srli t1, a6, 40 -; RV64I-NEXT: srli t2, a6, 32 -; RV64I-NEXT: srli t3, a6, 24 -; RV64I-NEXT: srli t4, a6, 16 -; RV64I-NEXT: srli a6, a6, 8 -; RV64I-NEXT: srli t5, a5, 56 -; RV64I-NEXT: srli t6, a5, 48 -; RV64I-NEXT: srli s0, a5, 40 -; RV64I-NEXT: srli s1, a5, 32 -; RV64I-NEXT: srli s2, a5, 24 -; RV64I-NEXT: srli s3, a5, 16 +; RV64I-NEXT: srli a7, a5, 56 +; RV64I-NEXT: srli t0, a5, 48 +; RV64I-NEXT: srli t1, a5, 40 +; RV64I-NEXT: srli t2, a5, 32 +; RV64I-NEXT: srli t3, a5, 24 +; RV64I-NEXT: srli t4, a5, 16 ; RV64I-NEXT: srli a5, a5, 8 +; RV64I-NEXT: srli t5, a6, 56 +; RV64I-NEXT: srli t6, a6, 48 +; RV64I-NEXT: srli s0, a6, 40 +; RV64I-NEXT: srli s1, a6, 32 +; RV64I-NEXT: srli s2, a6, 24 +; RV64I-NEXT: srli s3, a6, 16 +; RV64I-NEXT: srli a6, a6, 8 ; RV64I-NEXT: srli s4, a4, 56 ; RV64I-NEXT: srli s5, a4, 48 ; RV64I-NEXT: srli s6, a4, 40 @@ -4795,7 +4792,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb a7, 23(a2) ; RV64I-NEXT: srli a7, a4, 32 ; RV64I-NEXT: sb a3, 16(a2) -; RV64I-NEXT: sb a6, 17(a2) +; RV64I-NEXT: sb a5, 17(a2) ; RV64I-NEXT: sb t4, 18(a2) ; RV64I-NEXT: sb t3, 19(a2) ; RV64I-NEXT: srli a3, a4, 24 @@ -4803,10 +4800,10 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb s0, 5(a2) ; RV64I-NEXT: sb t6, 6(a2) ; RV64I-NEXT: sb t5, 7(a2) -; RV64I-NEXT: srli a6, a4, 16 +; RV64I-NEXT: srli a5, a4, 16 ; RV64I-NEXT: srli a4, a4, 8 ; RV64I-NEXT: sb a1, 0(a2) -; RV64I-NEXT: sb a5, 1(a2) +; RV64I-NEXT: sb a6, 1(a2) ; RV64I-NEXT: sb s3, 2(a2) ; RV64I-NEXT: sb s2, 3(a2) ; RV64I-NEXT: sb a7, 12(a2) @@ -4815,7 +4812,7 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %byteOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: sb s4, 15(a2) ; RV64I-NEXT: sb a0, 8(a2) ; RV64I-NEXT: sb a4, 9(a2) -; RV64I-NEXT: sb a6, 10(a2) +; RV64I-NEXT: sb a5, 10(a2) ; RV64I-NEXT: sb a3, 11(a2) ; RV64I-NEXT: ld s0, 152(sp) # 8-byte Folded Reload ; RV64I-NEXT: ld s1, 144(sp) # 8-byte Folded Reload @@ -5175,13 +5172,13 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: slli s7, s7, 24 ; RV64I-NEXT: or s5, s6, s5 ; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: lbu s3, 4(a1) +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 4(a1) ; RV64I-NEXT: lbu s6, 5(a1) -; RV64I-NEXT: or s4, s7, s4 ; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli s6, s6, 8 -; RV64I-NEXT: or s3, s6, s3 +; RV64I-NEXT: or s4, s6, s4 ; RV64I-NEXT: slli s7, s7, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, s7 @@ -5194,8 +5191,8 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: or a0, a0, t5 ; RV64I-NEXT: or t0, s0, t6 ; RV64I-NEXT: or t1, s5, s1 -; RV64I-NEXT: or t2, s4, s2 -; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: or a1, a1, s4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a6, a6, 32 ; RV64I-NEXT: slli a0, a0, 32 @@ -5220,24 +5217,24 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: andi a1, a1, 24 ; RV64I-NEXT: andi a0, a3, 32 ; RV64I-NEXT: add a1, s6, a1 -; RV64I-NEXT: ld a4, 0(a1) -; RV64I-NEXT: ld a5, 8(a1) -; RV64I-NEXT: ld a6, 16(a1) -; RV64I-NEXT: xori a7, a0, 63 +; RV64I-NEXT: xori a4, a0, 63 +; RV64I-NEXT: ld a5, 0(a1) +; RV64I-NEXT: ld a6, 8(a1) +; RV64I-NEXT: ld a7, 16(a1) ; RV64I-NEXT: ld t0, 24(a1) -; RV64I-NEXT: srl a0, a5, a3 -; RV64I-NEXT: slli t1, a6, 1 -; RV64I-NEXT: srl a1, a4, a3 -; RV64I-NEXT: slli a5, a5, 1 -; RV64I-NEXT: srl a4, a6, a3 -; RV64I-NEXT: slli a6, t0, 1 +; RV64I-NEXT: srl a0, a6, a3 +; RV64I-NEXT: slli t1, a7, 1 +; RV64I-NEXT: srl a1, a5, a3 +; RV64I-NEXT: slli a6, a6, 1 +; RV64I-NEXT: srl a5, a7, a3 +; RV64I-NEXT: slli a7, t0, 1 ; RV64I-NEXT: sra a3, t0, a3 -; RV64I-NEXT: sll t0, t1, a7 -; RV64I-NEXT: sll a5, a5, a7 -; RV64I-NEXT: sll a6, a6, a7 -; RV64I-NEXT: srli a7, a4, 24 -; RV64I-NEXT: srli t1, a4, 16 -; RV64I-NEXT: srli t2, a4, 8 +; RV64I-NEXT: sll t0, t1, a4 +; RV64I-NEXT: sll a6, a6, a4 +; RV64I-NEXT: sll a4, a7, a4 +; RV64I-NEXT: srli a7, a5, 24 +; RV64I-NEXT: srli t1, a5, 16 +; RV64I-NEXT: srli t2, a5, 8 ; RV64I-NEXT: srli t3, a3, 56 ; RV64I-NEXT: srli t4, a3, 48 ; RV64I-NEXT: srli t5, a3, 40 @@ -5249,19 +5246,19 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: srli s4, a1, 16 ; RV64I-NEXT: srli s5, a1, 8 ; RV64I-NEXT: srli s6, a0, 24 -; RV64I-NEXT: or a6, a4, a6 -; RV64I-NEXT: sb a4, 16(a2) +; RV64I-NEXT: or a4, a5, a4 +; RV64I-NEXT: sb a5, 16(a2) ; RV64I-NEXT: sb t2, 17(a2) ; RV64I-NEXT: sb t1, 18(a2) ; RV64I-NEXT: sb a7, 19(a2) -; RV64I-NEXT: srli a4, a0, 16 +; RV64I-NEXT: srli a5, a0, 16 ; RV64I-NEXT: sb t6, 28(a2) ; RV64I-NEXT: sb t5, 29(a2) ; RV64I-NEXT: sb t4, 30(a2) ; RV64I-NEXT: sb t3, 31(a2) ; RV64I-NEXT: srli a7, a0, 8 ; RV64I-NEXT: or t0, a0, t0 -; RV64I-NEXT: or a5, a1, a5 +; RV64I-NEXT: or a6, a1, a6 ; RV64I-NEXT: sb a3, 24(a2) ; RV64I-NEXT: sb s2, 25(a2) ; RV64I-NEXT: sb s1, 26(a2) @@ -5272,16 +5269,16 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: sb s3, 3(a2) ; RV64I-NEXT: sb a0, 8(a2) ; RV64I-NEXT: sb a7, 9(a2) -; RV64I-NEXT: sb a4, 10(a2) +; RV64I-NEXT: sb a5, 10(a2) ; RV64I-NEXT: sb s6, 11(a2) -; RV64I-NEXT: srli a0, a6, 56 -; RV64I-NEXT: srli a1, a6, 48 -; RV64I-NEXT: srli a3, a6, 40 -; RV64I-NEXT: srli a4, a6, 32 -; RV64I-NEXT: srli a6, a5, 56 -; RV64I-NEXT: srli a7, a5, 48 -; RV64I-NEXT: srli t1, a5, 40 -; RV64I-NEXT: srli a5, a5, 32 +; RV64I-NEXT: srli a0, a4, 56 +; RV64I-NEXT: srli a1, a4, 48 +; RV64I-NEXT: srli a3, a4, 40 +; RV64I-NEXT: srli a4, a4, 32 +; RV64I-NEXT: srli a5, a6, 56 +; RV64I-NEXT: srli a7, a6, 48 +; RV64I-NEXT: srli t1, a6, 40 +; RV64I-NEXT: srli a6, a6, 32 ; RV64I-NEXT: srli t2, t0, 56 ; RV64I-NEXT: srli t3, t0, 48 ; RV64I-NEXT: srli t4, t0, 40 @@ -5290,10 +5287,10 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV64I-NEXT: sb a3, 21(a2) ; RV64I-NEXT: sb a1, 22(a2) ; RV64I-NEXT: sb a0, 23(a2) -; RV64I-NEXT: sb a5, 4(a2) +; RV64I-NEXT: sb a6, 4(a2) ; RV64I-NEXT: sb t1, 5(a2) ; RV64I-NEXT: sb a7, 6(a2) -; RV64I-NEXT: sb a6, 7(a2) +; RV64I-NEXT: sb a5, 7(a2) ; RV64I-NEXT: sb t0, 12(a2) ; RV64I-NEXT: sb t4, 13(a2) ; RV64I-NEXT: sb t3, 14(a2) @@ -5431,13 +5428,13 @@ define void @ashr_32bytes_wordOff(ptr %src.ptr, ptr %wordOff.ptr, ptr %dst) noun ; RV32I-NEXT: sw t0, 12(sp) ; RV32I-NEXT: sw t1, 16(sp) ; RV32I-NEXT: sw a5, 20(sp) -; RV32I-NEXT: lw a6, 16(s6) -; RV32I-NEXT: lw a5, 20(s6) -; RV32I-NEXT: lw a7, 24(s6) ; RV32I-NEXT: lw a1, 0(s6) ; RV32I-NEXT: lw a0, 4(s6) ; RV32I-NEXT: lw a4, 8(s6) ; RV32I-NEXT: lw a3, 12(s6) +; RV32I-NEXT: lw a6, 16(s6) +; RV32I-NEXT: lw a5, 20(s6) +; RV32I-NEXT: lw a7, 24(s6) ; RV32I-NEXT: lw t0, 28(s6) ; RV32I-NEXT: srli t1, a7, 24 ; RV32I-NEXT: srli t2, a7, 16 @@ -5636,9 +5633,9 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV64I-NEXT: sd a7, 8(sp) ; RV64I-NEXT: sd a3, 16(sp) ; RV64I-NEXT: sd a1, 24(sp) -; RV64I-NEXT: ld a4, 16(t5) -; RV64I-NEXT: ld a0, 8(t5) ; RV64I-NEXT: ld a1, 0(t5) +; RV64I-NEXT: ld a0, 8(t5) +; RV64I-NEXT: ld a4, 16(t5) ; RV64I-NEXT: ld a3, 24(t5) ; RV64I-NEXT: srli a5, a4, 56 ; RV64I-NEXT: srli a6, a4, 48 @@ -5833,13 +5830,13 @@ define void @ashr_32bytes_dwordOff(ptr %src.ptr, ptr %dwordOff.ptr, ptr %dst) no ; RV32I-NEXT: sw t0, 12(sp) ; RV32I-NEXT: sw t1, 16(sp) ; RV32I-NEXT: sw a5, 20(sp) -; RV32I-NEXT: lw a6, 16(s6) -; RV32I-NEXT: lw a5, 20(s6) -; RV32I-NEXT: lw a7, 24(s6) ; RV32I-NEXT: lw a1, 0(s6) ; RV32I-NEXT: lw a0, 4(s6) ; RV32I-NEXT: lw a4, 8(s6) ; RV32I-NEXT: lw a3, 12(s6) +; RV32I-NEXT: lw a6, 16(s6) +; RV32I-NEXT: lw a5, 20(s6) +; RV32I-NEXT: lw a7, 24(s6) ; RV32I-NEXT: lw t0, 28(s6) ; RV32I-NEXT: srli t1, a7, 24 ; RV32I-NEXT: srli t2, a7, 16 diff --git a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll index b2c130c2d7c10..f02ffa8951ad7 100644 --- a/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/RISCV/wide-scalar-shift-legalization.ll @@ -5,12 +5,12 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: lshr_4bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) ; RV64I-NEXT: lb a0, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a0, a0, 24 @@ -28,26 +28,26 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: lshr_4bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 2(a0) -; RV32I-NEXT: lbu a5, 3(a0) -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a5, a5, 24 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: lbu a3, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: srl a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: srli a3, a0, 24 @@ -66,12 +66,12 @@ define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: shl_4bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) ; RV64I-NEXT: lb a0, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a0, a0, 24 @@ -89,26 +89,26 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: shl_4bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 2(a0) -; RV32I-NEXT: lbu a5, 3(a0) -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a5, a5, 24 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: lbu a3, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: sll a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: srli a3, a0, 24 @@ -127,12 +127,12 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-LABEL: ashr_4bytes: ; RV64I: # %bb.0: -; RV64I-NEXT: lbu a3, 1(a0) -; RV64I-NEXT: lbu a4, 0(a0) +; RV64I-NEXT: lbu a3, 0(a0) +; RV64I-NEXT: lbu a4, 1(a0) ; RV64I-NEXT: lbu a5, 2(a0) ; RV64I-NEXT: lb a0, 3(a0) -; RV64I-NEXT: slli a3, a3, 8 -; RV64I-NEXT: or a3, a3, a4 +; RV64I-NEXT: slli a4, a4, 8 +; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: lbu a1, 0(a1) ; RV64I-NEXT: slli a5, a5, 16 ; RV64I-NEXT: slli a0, a0, 24 @@ -150,26 +150,26 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: ashr_4bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 2(a0) -; RV32I-NEXT: lbu a5, 3(a0) -; RV32I-NEXT: lbu a0, 0(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a5, a5, 24 -; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: lbu a3, 0(a1) -; RV32I-NEXT: lbu a6, 1(a1) -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a3, a6, a3 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a0, 3(a0) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a0, a0, a5 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a5, 1(a1) +; RV32I-NEXT: lbu a6, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a5, a5, 8 +; RV32I-NEXT: or a4, a5, a4 +; RV32I-NEXT: slli a6, a6, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a0, a4, a0 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a1, a1, a6 +; RV32I-NEXT: or a0, a0, a3 +; RV32I-NEXT: or a1, a1, a4 ; RV32I-NEXT: sra a0, a0, a1 ; RV32I-NEXT: srli a1, a0, 16 ; RV32I-NEXT: srli a3, a0, 24 @@ -215,20 +215,20 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t2, t2, 24 ; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: or a7, t2, t0 +; RV64I-NEXT: lbu t0, 4(a1) ; RV64I-NEXT: lbu t1, 5(a1) -; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: lbu t2, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or a7, t1, a7 +; RV64I-NEXT: or t0, t1, t0 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a4, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a0, a0, a3 @@ -253,39 +253,39 @@ define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: lshr_8bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 5(a0) -; RV32I-NEXT: lbu a4, 6(a0) -; RV32I-NEXT: lbu a5, 7(a0) -; RV32I-NEXT: lbu a6, 4(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a5, a5, 24 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a3, 4(a0) +; RV32I-NEXT: lbu a4, 5(a0) +; RV32I-NEXT: lbu a5, 6(a0) +; RV32I-NEXT: lbu a6, 7(a0) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a6, 1(a1) +; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a4, a6, a4 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a5, a4, a3 -; RV32I-NEXT: or a4, a1, a6 -; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: or a4, a1, a4 ; RV32I-NEXT: srl a1, a5, a4 +; RV32I-NEXT: addi a3, a4, -32 ; RV32I-NEXT: bltz a3, .LBB3_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: j .LBB3_3 ; RV32I-NEXT: .LBB3_2: -; RV32I-NEXT: lbu a6, 1(a0) -; RV32I-NEXT: lbu a7, 0(a0) +; RV32I-NEXT: lbu a6, 0(a0) +; RV32I-NEXT: lbu a7, 1(a0) ; RV32I-NEXT: lbu t0, 2(a0) ; RV32I-NEXT: lbu a0, 3(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 ; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, t0 @@ -348,20 +348,20 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t2, t2, 24 ; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: or a7, t2, t0 +; RV64I-NEXT: lbu t0, 4(a1) ; RV64I-NEXT: lbu t1, 5(a1) -; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: lbu t2, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or a7, t1, a7 +; RV64I-NEXT: or t0, t1, t0 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a4, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a0, a0, a3 @@ -386,39 +386,39 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: shl_8bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 1(a0) -; RV32I-NEXT: lbu a4, 2(a0) -; RV32I-NEXT: lbu a5, 3(a0) -; RV32I-NEXT: lbu a6, 0(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: slli a4, a4, 16 -; RV32I-NEXT: slli a5, a5, 24 -; RV32I-NEXT: or a3, a3, a6 -; RV32I-NEXT: lbu a6, 0(a1) -; RV32I-NEXT: lbu a7, 1(a1) -; RV32I-NEXT: or a4, a5, a4 -; RV32I-NEXT: lbu a5, 2(a1) -; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli a7, a7, 8 -; RV32I-NEXT: or a6, a7, a6 +; RV32I-NEXT: lbu a3, 0(a0) +; RV32I-NEXT: lbu a4, 1(a0) +; RV32I-NEXT: lbu a5, 2(a0) +; RV32I-NEXT: lbu a6, 3(a0) +; RV32I-NEXT: slli a4, a4, 8 ; RV32I-NEXT: slli a5, a5, 16 +; RV32I-NEXT: slli a6, a6, 24 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a5, a6, a5 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a6, 1(a1) +; RV32I-NEXT: lbu a7, 2(a1) +; RV32I-NEXT: lbu a1, 3(a1) +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a4, a6, a4 +; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a5 -; RV32I-NEXT: or a5, a4, a3 -; RV32I-NEXT: or a4, a1, a6 -; RV32I-NEXT: addi a3, a4, -32 +; RV32I-NEXT: or a1, a1, a7 +; RV32I-NEXT: or a5, a5, a3 +; RV32I-NEXT: or a4, a1, a4 ; RV32I-NEXT: sll a1, a5, a4 +; RV32I-NEXT: addi a3, a4, -32 ; RV32I-NEXT: bltz a3, .LBB4_2 ; RV32I-NEXT: # %bb.1: ; RV32I-NEXT: mv a0, a1 ; RV32I-NEXT: j .LBB4_3 ; RV32I-NEXT: .LBB4_2: -; RV32I-NEXT: lbu a6, 5(a0) -; RV32I-NEXT: lbu a7, 4(a0) +; RV32I-NEXT: lbu a6, 4(a0) +; RV32I-NEXT: lbu a7, 5(a0) ; RV32I-NEXT: lbu t0, 6(a0) ; RV32I-NEXT: lbu a0, 7(a0) -; RV32I-NEXT: slli a6, a6, 8 -; RV32I-NEXT: or a6, a6, a7 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a6, a7, a6 ; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, t0 @@ -481,20 +481,20 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t2, t2, 24 ; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: or a7, t2, t0 +; RV64I-NEXT: lbu t0, 4(a1) ; RV64I-NEXT: lbu t1, 5(a1) -; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: lbu t2, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or a7, t1, a7 +; RV64I-NEXT: or t0, t1, t0 ; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t2 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a0, a0, a5 -; RV64I-NEXT: or a4, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a4, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a0, a0, a3 @@ -519,41 +519,40 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ; RV32I-LABEL: ashr_8bytes: ; RV32I: # %bb.0: -; RV32I-NEXT: lbu a3, 5(a0) -; RV32I-NEXT: lbu a4, 4(a0) +; RV32I-NEXT: lbu a3, 4(a0) +; RV32I-NEXT: lbu a4, 5(a0) ; RV32I-NEXT: lbu a5, 6(a0) ; RV32I-NEXT: lbu a6, 7(a0) -; RV32I-NEXT: slli a3, a3, 8 -; RV32I-NEXT: lbu a7, 0(a1) -; RV32I-NEXT: lbu t0, 1(a1) -; RV32I-NEXT: or a3, a3, a4 -; RV32I-NEXT: lbu a4, 2(a1) +; RV32I-NEXT: slli a4, a4, 8 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: lbu a4, 0(a1) +; RV32I-NEXT: lbu a7, 1(a1) +; RV32I-NEXT: lbu t0, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t0, t0, 8 -; RV32I-NEXT: or a7, t0, a7 -; RV32I-NEXT: slli a4, a4, 16 +; RV32I-NEXT: slli a7, a7, 8 +; RV32I-NEXT: or a7, a7, a4 +; RV32I-NEXT: slli t0, t0, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, a4 +; RV32I-NEXT: or a1, a1, t0 ; RV32I-NEXT: slli a4, a5, 16 ; RV32I-NEXT: slli a5, a6, 24 ; RV32I-NEXT: or a4, a5, a4 ; RV32I-NEXT: or a4, a4, a3 ; RV32I-NEXT: or a3, a1, a7 -; RV32I-NEXT: addi a6, a3, -32 ; RV32I-NEXT: sra a1, a4, a3 +; RV32I-NEXT: addi a6, a3, -32 ; RV32I-NEXT: bltz a6, .LBB5_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: srai a5, a5, 31 ; RV32I-NEXT: mv a0, a1 -; RV32I-NEXT: mv a1, a5 +; RV32I-NEXT: srai a1, a5, 31 ; RV32I-NEXT: j .LBB5_3 ; RV32I-NEXT: .LBB5_2: -; RV32I-NEXT: lbu a5, 1(a0) -; RV32I-NEXT: lbu a6, 0(a0) +; RV32I-NEXT: lbu a5, 0(a0) +; RV32I-NEXT: lbu a6, 1(a0) ; RV32I-NEXT: lbu a7, 2(a0) ; RV32I-NEXT: lbu a0, 3(a0) -; RV32I-NEXT: slli a5, a5, 8 -; RV32I-NEXT: or a5, a5, a6 +; RV32I-NEXT: slli a6, a6, 8 +; RV32I-NEXT: or a5, a6, a5 ; RV32I-NEXT: slli a7, a7, 16 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a7 @@ -615,53 +614,53 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t2, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: or a7, t3, t0 +; RV64I-NEXT: lbu t0, 4(a1) ; RV64I-NEXT: lbu t2, 5(a1) -; RV64I-NEXT: or t0, t3, t0 ; RV64I-NEXT: lbu t3, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, t1, a5 -; RV64I-NEXT: or a6, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a5, a4, a3 ; RV64I-NEXT: or a4, a1, a6 -; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: srl a1, a5, a4 +; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: bltz a3, .LBB6_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: j .LBB6_3 ; RV64I-NEXT: .LBB6_2: -; RV64I-NEXT: lbu a6, 1(a0) -; RV64I-NEXT: lbu a7, 2(a0) -; RV64I-NEXT: lbu t0, 3(a0) -; RV64I-NEXT: lbu t1, 0(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: lbu t1, 4(a0) -; RV64I-NEXT: lbu t2, 5(a0) -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu t0, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: lbu a6, 0(a0) +; RV64I-NEXT: lbu a7, 1(a0) +; RV64I-NEXT: lbu t0, 2(a0) +; RV64I-NEXT: lbu t1, 3(a0) +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: lbu t0, 4(a0) +; RV64I-NEXT: lbu t1, 5(a0) +; RV64I-NEXT: lbu t2, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, t2 ; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: not a7, a4 ; RV64I-NEXT: slli a5, a5, 1 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: srl a0, a0, a4 @@ -740,20 +739,20 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or t3, t4, t3 ; RV32I-NEXT: or a6, t1, a6 -; RV32I-NEXT: lbu t1, 0(a1) -; RV32I-NEXT: lbu t4, 1(a1) ; RV32I-NEXT: or a0, a0, t2 -; RV32I-NEXT: lbu t2, 2(a1) +; RV32I-NEXT: lbu t1, 0(a1) +; RV32I-NEXT: lbu t2, 1(a1) +; RV32I-NEXT: lbu t4, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: or t1, t4, t1 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: sw zero, 16(sp) ; RV32I-NEXT: sw zero, 20(sp) ; RV32I-NEXT: sw zero, 24(sp) ; RV32I-NEXT: sw zero, 28(sp) -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t4, t4, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t2 +; RV32I-NEXT: or a1, a1, t4 ; RV32I-NEXT: mv t2, sp ; RV32I-NEXT: or a3, a4, a3 ; RV32I-NEXT: or a4, t0, a7 @@ -767,11 +766,11 @@ define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: srli a0, a1, 3 ; RV32I-NEXT: andi a3, a1, 31 ; RV32I-NEXT: andi a0, a0, 12 +; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: add a0, t2, a0 ; RV32I-NEXT: lw a4, 0(a0) ; RV32I-NEXT: lw a5, 4(a0) ; RV32I-NEXT: lw a6, 8(a0) -; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: lw a0, 12(a0) ; RV32I-NEXT: srl a7, a5, a1 ; RV32I-NEXT: slli t0, a6, 1 @@ -851,53 +850,53 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t2, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: or a7, t3, t0 +; RV64I-NEXT: lbu t0, 4(a1) ; RV64I-NEXT: lbu t2, 5(a1) -; RV64I-NEXT: or t0, t3, t0 ; RV64I-NEXT: lbu t3, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a4, t1, a5 -; RV64I-NEXT: or a6, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a5, a4, a3 ; RV64I-NEXT: or a4, a1, a6 -; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: sll a1, a5, a4 +; RV64I-NEXT: addi a3, a4, -64 ; RV64I-NEXT: bltz a3, .LBB7_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: j .LBB7_3 ; RV64I-NEXT: .LBB7_2: -; RV64I-NEXT: lbu a6, 9(a0) -; RV64I-NEXT: lbu a7, 10(a0) -; RV64I-NEXT: lbu t0, 11(a0) -; RV64I-NEXT: lbu t1, 8(a0) -; RV64I-NEXT: slli a6, a6, 8 -; RV64I-NEXT: slli a7, a7, 16 -; RV64I-NEXT: slli t0, t0, 24 -; RV64I-NEXT: or a6, a6, t1 -; RV64I-NEXT: lbu t1, 12(a0) -; RV64I-NEXT: lbu t2, 13(a0) -; RV64I-NEXT: or a7, t0, a7 -; RV64I-NEXT: lbu t0, 14(a0) -; RV64I-NEXT: lbu a0, 15(a0) -; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or t1, t2, t1 +; RV64I-NEXT: lbu a6, 8(a0) +; RV64I-NEXT: lbu a7, 9(a0) +; RV64I-NEXT: lbu t0, 10(a0) +; RV64I-NEXT: lbu t1, 11(a0) +; RV64I-NEXT: slli a7, a7, 8 ; RV64I-NEXT: slli t0, t0, 16 +; RV64I-NEXT: slli t1, t1, 24 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a7, t1, t0 +; RV64I-NEXT: lbu t0, 12(a0) +; RV64I-NEXT: lbu t1, 13(a0) +; RV64I-NEXT: lbu t2, 14(a0) +; RV64I-NEXT: lbu a0, 15(a0) +; RV64I-NEXT: slli t1, t1, 8 +; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: slli t2, t2, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, t2 ; RV64I-NEXT: or a6, a7, a6 ; RV64I-NEXT: not a7, a4 ; RV64I-NEXT: srli a5, a5, 1 -; RV64I-NEXT: or a0, a0, t1 +; RV64I-NEXT: or a0, a0, t0 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a6 ; RV64I-NEXT: sll a0, a0, a4 @@ -976,20 +975,20 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or t3, t4, t3 ; RV32I-NEXT: or a6, t1, a6 -; RV32I-NEXT: lbu t1, 0(a1) -; RV32I-NEXT: lbu t4, 1(a1) ; RV32I-NEXT: or a0, a0, t2 -; RV32I-NEXT: lbu t2, 2(a1) +; RV32I-NEXT: lbu t1, 0(a1) +; RV32I-NEXT: lbu t2, 1(a1) +; RV32I-NEXT: lbu t4, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t4, t4, 8 -; RV32I-NEXT: or t1, t4, t1 +; RV32I-NEXT: slli t2, t2, 8 +; RV32I-NEXT: or t1, t2, t1 ; RV32I-NEXT: sw zero, 0(sp) ; RV32I-NEXT: sw zero, 4(sp) ; RV32I-NEXT: sw zero, 8(sp) ; RV32I-NEXT: sw zero, 12(sp) -; RV32I-NEXT: slli t2, t2, 16 +; RV32I-NEXT: slli t4, t4, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t2 +; RV32I-NEXT: or a1, a1, t4 ; RV32I-NEXT: addi t2, sp, 16 ; RV32I-NEXT: or a3, a4, a3 ; RV32I-NEXT: or a4, t0, a7 @@ -1003,12 +1002,12 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: srli a0, a1, 3 ; RV32I-NEXT: andi a3, a1, 31 ; RV32I-NEXT: andi a0, a0, 12 +; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: sub a0, t2, a0 ; RV32I-NEXT: lw a4, 0(a0) ; RV32I-NEXT: lw a5, 4(a0) ; RV32I-NEXT: lw a6, 8(a0) ; RV32I-NEXT: lw a0, 12(a0) -; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: sll a7, a5, a1 ; RV32I-NEXT: srli t0, a4, 1 ; RV32I-NEXT: sll a0, a0, a1 @@ -1087,55 +1086,54 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli t3, t3, 24 ; RV64I-NEXT: or t1, t2, t1 ; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 4(a1) +; RV64I-NEXT: or a7, t3, t0 +; RV64I-NEXT: lbu t0, 4(a1) ; RV64I-NEXT: lbu t2, 5(a1) -; RV64I-NEXT: or t0, t3, t0 ; RV64I-NEXT: lbu t3, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli t2, t2, 8 -; RV64I-NEXT: or a7, t2, a7 +; RV64I-NEXT: or t0, t2, t0 ; RV64I-NEXT: slli t3, t3, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, t3 ; RV64I-NEXT: or a3, a4, a3 ; RV64I-NEXT: or a5, t1, a5 -; RV64I-NEXT: or a6, t0, a6 -; RV64I-NEXT: or a1, a1, a7 +; RV64I-NEXT: or a6, a7, a6 +; RV64I-NEXT: or a1, a1, t0 ; RV64I-NEXT: slli a4, a5, 32 ; RV64I-NEXT: slli a1, a1, 32 ; RV64I-NEXT: or a4, a4, a3 ; RV64I-NEXT: or a3, a1, a6 -; RV64I-NEXT: addi a6, a3, -64 ; RV64I-NEXT: sra a1, a4, a3 +; RV64I-NEXT: addi a6, a3, -64 ; RV64I-NEXT: bltz a6, .LBB8_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sraiw a3, a5, 31 ; RV64I-NEXT: mv a0, a1 -; RV64I-NEXT: mv a1, a3 +; RV64I-NEXT: sraiw a1, a5, 31 ; RV64I-NEXT: j .LBB8_3 ; RV64I-NEXT: .LBB8_2: -; RV64I-NEXT: lbu a5, 1(a0) -; RV64I-NEXT: lbu a6, 2(a0) -; RV64I-NEXT: lbu a7, 3(a0) -; RV64I-NEXT: lbu t0, 0(a0) -; RV64I-NEXT: slli a5, a5, 8 -; RV64I-NEXT: slli a6, a6, 16 -; RV64I-NEXT: slli a7, a7, 24 -; RV64I-NEXT: or a5, a5, t0 -; RV64I-NEXT: lbu t0, 4(a0) -; RV64I-NEXT: lbu t1, 5(a0) -; RV64I-NEXT: or a6, a7, a6 -; RV64I-NEXT: lbu a7, 6(a0) -; RV64I-NEXT: lbu a0, 7(a0) -; RV64I-NEXT: slli t1, t1, 8 -; RV64I-NEXT: or t0, t1, t0 +; RV64I-NEXT: lbu a5, 0(a0) +; RV64I-NEXT: lbu a6, 1(a0) +; RV64I-NEXT: lbu a7, 2(a0) +; RV64I-NEXT: lbu t0, 3(a0) +; RV64I-NEXT: slli a6, a6, 8 ; RV64I-NEXT: slli a7, a7, 16 +; RV64I-NEXT: slli t0, t0, 24 +; RV64I-NEXT: or a5, a6, a5 +; RV64I-NEXT: or a6, t0, a7 +; RV64I-NEXT: lbu a7, 4(a0) +; RV64I-NEXT: lbu t0, 5(a0) +; RV64I-NEXT: lbu t1, 6(a0) +; RV64I-NEXT: lbu a0, 7(a0) +; RV64I-NEXT: slli t0, t0, 8 +; RV64I-NEXT: or a7, t0, a7 +; RV64I-NEXT: slli t1, t1, 16 ; RV64I-NEXT: slli a0, a0, 24 -; RV64I-NEXT: or a0, a0, a7 +; RV64I-NEXT: or a0, a0, t1 ; RV64I-NEXT: or a5, a6, a5 ; RV64I-NEXT: not a6, a3 ; RV64I-NEXT: slli a4, a4, 1 -; RV64I-NEXT: or a0, a0, t0 +; RV64I-NEXT: or a0, a0, a7 ; RV64I-NEXT: slli a0, a0, 32 ; RV64I-NEXT: or a0, a0, a5 ; RV64I-NEXT: srl a0, a0, a3 @@ -1209,26 +1207,26 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: slli t1, t1, 8 ; RV32I-NEXT: or a4, t3, a4 ; RV32I-NEXT: or t3, t5, t4 -; RV32I-NEXT: lbu t4, 0(a1) -; RV32I-NEXT: lbu t5, 1(a1) ; RV32I-NEXT: or t0, t1, t0 -; RV32I-NEXT: lbu t1, 2(a1) +; RV32I-NEXT: lbu t1, 0(a1) +; RV32I-NEXT: lbu t4, 1(a1) +; RV32I-NEXT: lbu t5, 2(a1) ; RV32I-NEXT: lbu a1, 3(a1) -; RV32I-NEXT: slli t5, t5, 8 -; RV32I-NEXT: or t4, t5, t4 -; RV32I-NEXT: slli t1, t1, 16 +; RV32I-NEXT: slli t4, t4, 8 +; RV32I-NEXT: or t1, t4, t1 +; RV32I-NEXT: slli t5, t5, 16 ; RV32I-NEXT: slli a1, a1, 24 -; RV32I-NEXT: or a1, a1, t1 +; RV32I-NEXT: or a1, a1, t5 ; RV32I-NEXT: or a3, a5, a3 ; RV32I-NEXT: mv a5, sp ; RV32I-NEXT: slli t2, t2, 16 ; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or t1, a0, t2 +; RV32I-NEXT: or t2, a0, t2 ; RV32I-NEXT: srai a0, a0, 31 ; RV32I-NEXT: or a6, a7, a6 ; RV32I-NEXT: or a4, t3, a4 -; RV32I-NEXT: or a7, t1, t0 -; RV32I-NEXT: or a1, a1, t4 +; RV32I-NEXT: or a7, t2, t0 +; RV32I-NEXT: or a1, a1, t1 ; RV32I-NEXT: sw a0, 16(sp) ; RV32I-NEXT: sw a0, 20(sp) ; RV32I-NEXT: sw a0, 24(sp) @@ -1240,11 +1238,11 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV32I-NEXT: srli a0, a1, 3 ; RV32I-NEXT: andi a3, a1, 31 ; RV32I-NEXT: andi a0, a0, 12 +; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: add a0, a5, a0 ; RV32I-NEXT: lw a4, 0(a0) ; RV32I-NEXT: lw a5, 4(a0) ; RV32I-NEXT: lw a6, 8(a0) -; RV32I-NEXT: xori a3, a3, 31 ; RV32I-NEXT: lw a0, 12(a0) ; RV32I-NEXT: srl a7, a5, a1 ; RV32I-NEXT: slli t0, a6, 1 @@ -1392,13 +1390,13 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli s7, s7, 24 ; RV64I-NEXT: or s5, s6, s5 ; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: lbu s3, 4(a1) +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 4(a1) ; RV64I-NEXT: lbu s6, 5(a1) -; RV64I-NEXT: or s4, s7, s4 ; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli s6, s6, 8 -; RV64I-NEXT: or s3, s6, s3 +; RV64I-NEXT: or s4, s6, s4 ; RV64I-NEXT: slli s7, s7, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, s7 @@ -1415,8 +1413,8 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, t5 ; RV64I-NEXT: or t1, s0, t6 ; RV64I-NEXT: or t2, s5, s1 -; RV64I-NEXT: or t3, s4, s2 -; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t3, s3, s2 +; RV64I-NEXT: or a1, a1, s4 ; RV64I-NEXT: slli a3, a3, 32 ; RV64I-NEXT: slli a7, a7, 32 ; RV64I-NEXT: slli a0, a0, 32 @@ -1434,11 +1432,11 @@ define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srli a0, a1, 3 ; RV64I-NEXT: andi a3, a1, 63 ; RV64I-NEXT: andi a0, a0, 24 +; RV64I-NEXT: xori a3, a3, 63 ; RV64I-NEXT: add a0, a6, a0 ; RV64I-NEXT: ld a4, 0(a0) ; RV64I-NEXT: ld a5, 8(a0) ; RV64I-NEXT: ld a6, 16(a0) -; RV64I-NEXT: xori a3, a3, 63 ; RV64I-NEXT: ld a0, 24(a0) ; RV64I-NEXT: srl a7, a5, a1 ; RV64I-NEXT: slli t0, a6, 1 @@ -1868,13 +1866,13 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli s7, s7, 24 ; RV64I-NEXT: or s5, s6, s5 ; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: lbu s3, 4(a1) +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 4(a1) ; RV64I-NEXT: lbu s6, 5(a1) -; RV64I-NEXT: or s4, s7, s4 ; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli s6, s6, 8 -; RV64I-NEXT: or s3, s6, s3 +; RV64I-NEXT: or s4, s6, s4 ; RV64I-NEXT: slli s7, s7, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, s7 @@ -1891,8 +1889,8 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, t5 ; RV64I-NEXT: or t1, s0, t6 ; RV64I-NEXT: or t2, s5, s1 -; RV64I-NEXT: or t3, s4, s2 -; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t3, s3, s2 +; RV64I-NEXT: or a1, a1, s4 ; RV64I-NEXT: slli a3, a3, 32 ; RV64I-NEXT: slli a7, a7, 32 ; RV64I-NEXT: slli a0, a0, 32 @@ -1910,12 +1908,12 @@ define void @shl_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srli a0, a1, 3 ; RV64I-NEXT: andi a3, a1, 63 ; RV64I-NEXT: andi a0, a0, 24 +; RV64I-NEXT: xori a3, a3, 63 ; RV64I-NEXT: sub a0, a6, a0 ; RV64I-NEXT: ld a4, 0(a0) ; RV64I-NEXT: ld a5, 8(a0) ; RV64I-NEXT: ld a6, 16(a0) ; RV64I-NEXT: ld a0, 24(a0) -; RV64I-NEXT: xori a3, a3, 63 ; RV64I-NEXT: sll a7, a5, a1 ; RV64I-NEXT: srli t0, a4, 1 ; RV64I-NEXT: sll t1, a0, a1 @@ -2344,13 +2342,13 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: slli s7, s7, 24 ; RV64I-NEXT: or s5, s6, s5 ; RV64I-NEXT: or s2, s3, s2 -; RV64I-NEXT: lbu s3, 4(a1) +; RV64I-NEXT: or s3, s7, s4 +; RV64I-NEXT: lbu s4, 4(a1) ; RV64I-NEXT: lbu s6, 5(a1) -; RV64I-NEXT: or s4, s7, s4 ; RV64I-NEXT: lbu s7, 6(a1) ; RV64I-NEXT: lbu a1, 7(a1) ; RV64I-NEXT: slli s6, s6, 8 -; RV64I-NEXT: or s3, s6, s3 +; RV64I-NEXT: or s4, s6, s4 ; RV64I-NEXT: slli s7, s7, 16 ; RV64I-NEXT: slli a1, a1, 24 ; RV64I-NEXT: or a1, a1, s7 @@ -2363,8 +2361,8 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: or a0, a0, t5 ; RV64I-NEXT: or t0, s0, t6 ; RV64I-NEXT: or t1, s5, s1 -; RV64I-NEXT: or t2, s4, s2 -; RV64I-NEXT: or a1, a1, s3 +; RV64I-NEXT: or t2, s3, s2 +; RV64I-NEXT: or a1, a1, s4 ; RV64I-NEXT: slli a4, a4, 32 ; RV64I-NEXT: slli a6, a6, 32 ; RV64I-NEXT: slli a0, a0, 32 @@ -2387,11 +2385,11 @@ define void @ashr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; RV64I-NEXT: srli a0, a1, 3 ; RV64I-NEXT: andi a3, a1, 63 ; RV64I-NEXT: andi a0, a0, 24 +; RV64I-NEXT: xori a3, a3, 63 ; RV64I-NEXT: add a0, s6, a0 ; RV64I-NEXT: ld a4, 0(a0) ; RV64I-NEXT: ld a5, 8(a0) ; RV64I-NEXT: ld a6, 16(a0) -; RV64I-NEXT: xori a3, a3, 63 ; RV64I-NEXT: ld a0, 24(a0) ; RV64I-NEXT: srl a7, a5, a1 ; RV64I-NEXT: slli t0, a6, 1 diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index a30593d7d7afb..a496699f7e386 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -1713,8 +1713,8 @@ define signext i32 @umulo3.i32(i32 signext %0, i32 signext %1, ptr %2) { ; RV64-NEXT: mulhu a0, a0, a1 ; RV64-NEXT: srli a1, a0, 32 ; RV64-NEXT: snez a1, a1 -; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: sw a1, 0(a2) +; RV64-NEXT: sext.w a0, a0 ; RV64-NEXT: ret ; ; RV32ZBA-LABEL: umulo3.i32: @@ -1733,8 +1733,8 @@ define signext i32 @umulo3.i32(i32 signext %0, i32 signext %1, ptr %2) { ; RV64ZBA-NEXT: mul a3, a0, a1 ; RV64ZBA-NEXT: srli a3, a3, 32 ; RV64ZBA-NEXT: snez a3, a3 -; RV64ZBA-NEXT: mulw a0, a0, a1 ; RV64ZBA-NEXT: sw a3, 0(a2) +; RV64ZBA-NEXT: mulw a0, a0, a1 ; RV64ZBA-NEXT: ret ; ; RV32ZICOND-LABEL: umulo3.i32: @@ -1753,8 +1753,8 @@ define signext i32 @umulo3.i32(i32 signext %0, i32 signext %1, ptr %2) { ; RV64ZICOND-NEXT: mulhu a0, a0, a1 ; RV64ZICOND-NEXT: srli a1, a0, 32 ; RV64ZICOND-NEXT: snez a1, a1 -; RV64ZICOND-NEXT: sext.w a0, a0 ; RV64ZICOND-NEXT: sw a1, 0(a2) +; RV64ZICOND-NEXT: sext.w a0, a0 ; RV64ZICOND-NEXT: ret %4 = tail call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %0, i32 %1) %5 = extractvalue { i32, i1 } %4, 1 diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll index e761fcb736a87..f6b7f97f6525c 100644 --- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll +++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll @@ -292,12 +292,12 @@ define ptr @lwuib(ptr %base, i64 %a, ptr %addr.1) { define ptr @ldia(ptr %base, ptr %addr.2, i64 %a) { ; RV32XTHEADMEMIDX-LABEL: ldia: ; RV32XTHEADMEMIDX: # %bb.0: -; RV32XTHEADMEMIDX-NEXT: lw a4, 4(a0) -; RV32XTHEADMEMIDX-NEXT: lw a5, 0(a0) +; RV32XTHEADMEMIDX-NEXT: lw a4, 0(a0) +; RV32XTHEADMEMIDX-NEXT: lw a5, 4(a0) ; RV32XTHEADMEMIDX-NEXT: addi a0, a0, -128 -; RV32XTHEADMEMIDX-NEXT: add a3, a4, a3 -; RV32XTHEADMEMIDX-NEXT: add a2, a5, a2 -; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a5 +; RV32XTHEADMEMIDX-NEXT: add a3, a5, a3 +; RV32XTHEADMEMIDX-NEXT: add a2, a4, a2 +; RV32XTHEADMEMIDX-NEXT: sltu a4, a2, a4 ; RV32XTHEADMEMIDX-NEXT: add a3, a3, a4 ; RV32XTHEADMEMIDX-NEXT: sw a2, 0(a1) ; RV32XTHEADMEMIDX-NEXT: sw a3, 4(a1) @@ -859,9 +859,9 @@ define i64 @lrd(ptr %a, i64 %b) { ; RV32XTHEADMEMIDX-LABEL: lrd: ; RV32XTHEADMEMIDX: # %bb.0: ; RV32XTHEADMEMIDX-NEXT: slli a2, a1, 3 +; RV32XTHEADMEMIDX-NEXT: add a2, a0, a2 +; RV32XTHEADMEMIDX-NEXT: lw a2, 4(a2) ; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 3 -; RV32XTHEADMEMIDX-NEXT: add a0, a0, a2 -; RV32XTHEADMEMIDX-NEXT: lw a2, 4(a0) ; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 ; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 ; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 @@ -883,8 +883,8 @@ define i64 @lrd_2(ptr %a, i64 %b) { ; RV32XTHEADMEMIDX-LABEL: lrd_2: ; RV32XTHEADMEMIDX: # %bb.0: ; RV32XTHEADMEMIDX-NEXT: addi a2, a0, 96 -; RV32XTHEADMEMIDX-NEXT: th.lrw a2, a2, a1, 3 ; RV32XTHEADMEMIDX-NEXT: addi a0, a0, 100 +; RV32XTHEADMEMIDX-NEXT: th.lrw a2, a2, a1, 3 ; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 3 ; RV32XTHEADMEMIDX-NEXT: add a0, a2, a2 ; RV32XTHEADMEMIDX-NEXT: sltu a2, a0, a2 @@ -909,9 +909,9 @@ define i64 @lurd(ptr %a, i32 %b) { ; RV32XTHEADMEMIDX-LABEL: lurd: ; RV32XTHEADMEMIDX: # %bb.0: ; RV32XTHEADMEMIDX-NEXT: slli a2, a1, 3 +; RV32XTHEADMEMIDX-NEXT: add a2, a0, a2 +; RV32XTHEADMEMIDX-NEXT: lw a2, 4(a2) ; RV32XTHEADMEMIDX-NEXT: th.lrw a1, a0, a1, 3 -; RV32XTHEADMEMIDX-NEXT: add a0, a0, a2 -; RV32XTHEADMEMIDX-NEXT: lw a2, 4(a0) ; RV32XTHEADMEMIDX-NEXT: add a0, a1, a1 ; RV32XTHEADMEMIDX-NEXT: sltu a1, a0, a1 ; RV32XTHEADMEMIDX-NEXT: add a2, a2, a2 diff --git a/llvm/test/CodeGen/RISCV/xtheadmempair.ll b/llvm/test/CodeGen/RISCV/xtheadmempair.ll index 3525c40026064..7c940a3966217 100644 --- a/llvm/test/CodeGen/RISCV/xtheadmempair.ll +++ b/llvm/test/CodeGen/RISCV/xtheadmempair.ll @@ -57,14 +57,14 @@ define i64 @lwud(ptr %a) { define i64 @ldd(ptr %a) { ; RV32XTHEADMEMPAIR-LABEL: ldd: ; RV32XTHEADMEMPAIR: # %bb.0: -; RV32XTHEADMEMPAIR-NEXT: lw a1, 44(a0) -; RV32XTHEADMEMPAIR-NEXT: lw a2, 32(a0) -; RV32XTHEADMEMPAIR-NEXT: lw a3, 36(a0) -; RV32XTHEADMEMPAIR-NEXT: lw a0, 40(a0) -; RV32XTHEADMEMPAIR-NEXT: add a1, a3, a1 -; RV32XTHEADMEMPAIR-NEXT: add a0, a2, a0 -; RV32XTHEADMEMPAIR-NEXT: sltu a2, a0, a2 -; RV32XTHEADMEMPAIR-NEXT: add a1, a1, a2 +; RV32XTHEADMEMPAIR-NEXT: lw a1, 32(a0) +; RV32XTHEADMEMPAIR-NEXT: lw a2, 36(a0) +; RV32XTHEADMEMPAIR-NEXT: lw a3, 40(a0) +; RV32XTHEADMEMPAIR-NEXT: lw a0, 44(a0) +; RV32XTHEADMEMPAIR-NEXT: add a2, a2, a0 +; RV32XTHEADMEMPAIR-NEXT: add a0, a1, a3 +; RV32XTHEADMEMPAIR-NEXT: sltu a1, a0, a1 +; RV32XTHEADMEMPAIR-NEXT: add a1, a2, a1 ; RV32XTHEADMEMPAIR-NEXT: ret ; ; RV64XTHEADMEMPAIR-LABEL: ldd: @@ -245,10 +245,10 @@ define i64 @ld64(ptr %a) { define i128 @ld128(ptr %a) { ; RV32XTHEADMEMPAIR-LABEL: ld128: ; RV32XTHEADMEMPAIR: # %bb.0: -; RV32XTHEADMEMPAIR-NEXT: th.lwd a2, a3, (a1), 1, 3 -; RV32XTHEADMEMPAIR-NEXT: th.lwd a4, a5, (a1), 0, 3 -; RV32XTHEADMEMPAIR-NEXT: th.swd a2, a3, (a0), 1, 3 -; RV32XTHEADMEMPAIR-NEXT: th.swd a4, a5, (a0), 0, 3 +; RV32XTHEADMEMPAIR-NEXT: th.lwd a2, a3, (a1), 0, 3 +; RV32XTHEADMEMPAIR-NEXT: th.lwd a4, a5, (a1), 1, 3 +; RV32XTHEADMEMPAIR-NEXT: th.swd a4, a5, (a0), 1, 3 +; RV32XTHEADMEMPAIR-NEXT: th.swd a2, a3, (a0), 0, 3 ; RV32XTHEADMEMPAIR-NEXT: ret ; ; RV64XTHEADMEMPAIR-LABEL: ld128: @@ -279,10 +279,10 @@ define void @sd64(ptr %a, i64 %b) { define void @sd128(ptr %a, i128 %b) { ; RV32XTHEADMEMPAIR-LABEL: sd128: ; RV32XTHEADMEMPAIR: # %bb.0: -; RV32XTHEADMEMPAIR-NEXT: th.lwd a2, a3, (a1), 1, 3 -; RV32XTHEADMEMPAIR-NEXT: th.lwd a4, a5, (a1), 0, 3 -; RV32XTHEADMEMPAIR-NEXT: th.swd a2, a3, (a0), 1, 3 -; RV32XTHEADMEMPAIR-NEXT: th.swd a4, a5, (a0), 0, 3 +; RV32XTHEADMEMPAIR-NEXT: th.lwd a2, a3, (a1), 0, 3 +; RV32XTHEADMEMPAIR-NEXT: th.lwd a4, a5, (a1), 1, 3 +; RV32XTHEADMEMPAIR-NEXT: th.swd a4, a5, (a0), 1, 3 +; RV32XTHEADMEMPAIR-NEXT: th.swd a2, a3, (a0), 0, 3 ; RV32XTHEADMEMPAIR-NEXT: ret ; ; RV64XTHEADMEMPAIR-LABEL: sd128: diff --git a/llvm/test/CodeGen/RISCV/zbb-logic-neg-imm.ll b/llvm/test/CodeGen/RISCV/zbb-logic-neg-imm.ll index d953d34e2d7b9..1c2eb5ecafbc4 100644 --- a/llvm/test/CodeGen/RISCV/zbb-logic-neg-imm.ll +++ b/llvm/test/CodeGen/RISCV/zbb-logic-neg-imm.ll @@ -137,13 +137,13 @@ define void @orarray100(ptr %a) { ; RV32-NEXT: addi a1, a1, 1 ; RV32-NEXT: add a4, a0, a4 ; RV32-NEXT: lw a5, 0(a4) -; RV32-NEXT: seqz a6, a1 -; RV32-NEXT: add a2, a2, a6 -; RV32-NEXT: xori a6, a1, 100 ; RV32-NEXT: orn a5, a5, a3 -; RV32-NEXT: or a6, a6, a2 ; RV32-NEXT: sw a5, 0(a4) -; RV32-NEXT: bnez a6, .LBB8_1 +; RV32-NEXT: seqz a4, a1 +; RV32-NEXT: xori a5, a1, 100 +; RV32-NEXT: add a2, a2, a4 +; RV32-NEXT: or a5, a5, a2 +; RV32-NEXT: bnez a5, .LBB8_1 ; RV32-NEXT: # %bb.2: # %for.cond.cleanup ; RV32-NEXT: ret ; @@ -180,16 +180,16 @@ for.body: define void @orarray3(ptr %a) { ; CHECK-LABEL: orarray3: ; CHECK: # %bb.0: -; CHECK-NEXT: lw a1, 0(a0) -; CHECK-NEXT: lw a2, 4(a0) -; CHECK-NEXT: lw a3, 8(a0) -; CHECK-NEXT: lui a4, 1048560 -; CHECK-NEXT: orn a1, a1, a4 -; CHECK-NEXT: orn a2, a2, a4 -; CHECK-NEXT: orn a3, a3, a4 -; CHECK-NEXT: sw a1, 0(a0) -; CHECK-NEXT: sw a2, 4(a0) -; CHECK-NEXT: sw a3, 8(a0) +; CHECK-NEXT: lui a1, 1048560 +; CHECK-NEXT: lw a2, 0(a0) +; CHECK-NEXT: lw a3, 4(a0) +; CHECK-NEXT: lw a4, 8(a0) +; CHECK-NEXT: orn a2, a2, a1 +; CHECK-NEXT: orn a3, a3, a1 +; CHECK-NEXT: orn a1, a4, a1 +; CHECK-NEXT: sw a2, 0(a0) +; CHECK-NEXT: sw a3, 4(a0) +; CHECK-NEXT: sw a1, 8(a0) ; CHECK-NEXT: ret %1 = load i32, ptr %a, align 4 %or = or i32 %1, 65535 diff --git a/llvm/test/CodeGen/RISCV/zdinx-asm-constraint.ll b/llvm/test/CodeGen/RISCV/zdinx-asm-constraint.ll index b7d7d4c0945b6..d9f6e1a5820c8 100644 --- a/llvm/test/CodeGen/RISCV/zdinx-asm-constraint.ll +++ b/llvm/test/CodeGen/RISCV/zdinx-asm-constraint.ll @@ -10,11 +10,11 @@ define dso_local void @zdinx_asm(ptr nocapture noundef writeonly %a, double noun ; CHECK-LABEL: zdinx_asm: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mv a5, a4 -; CHECK-NEXT: mv a7, a2 ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: #APP -; CHECK-NEXT: fsgnjx.d a2, a6, a4 +; CHECK-NEXT: fsgnjx.d a2, a2, a4 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: sw a2, 8(a0) ; CHECK-NEXT: sw a3, 12(a0) @@ -30,11 +30,11 @@ define dso_local void @zdinx_asm_R(ptr nocapture noundef writeonly %a, double no ; CHECK-LABEL: zdinx_asm_R: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: mv a5, a4 -; CHECK-NEXT: mv a7, a2 ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: mv a6, a1 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: #APP -; CHECK-NEXT: fsgnjx.d a2, a6, a4 +; CHECK-NEXT: fsgnjx.d a2, a2, a4 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: sw a2, 8(a0) ; CHECK-NEXT: sw a3, 12(a0) @@ -133,21 +133,15 @@ entry: define dso_local void @zdinx_asm_cr(ptr nocapture noundef writeonly %a, double noundef %b, double noundef %c) nounwind { ; CHECK-LABEL: zdinx_asm_cr: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s1, 8(sp) # 4-byte Folded Spill ; CHECK-NEXT: mv a5, a4 -; CHECK-NEXT: mv s1, a2 ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: #APP -; CHECK-NEXT: fsgnjx.d a2, s0, a4 +; CHECK-NEXT: fsgnjx.d a2, a2, a4 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: sw a2, 8(a0) ; CHECK-NEXT: sw a3, 12(a0) -; CHECK-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds double, ptr %a, i32 1 @@ -189,21 +183,15 @@ entry: define dso_local void @zdinx_asm_cR(ptr nocapture noundef writeonly %a, double noundef %b, double noundef %c) nounwind { ; CHECK-LABEL: zdinx_asm_cR: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: addi sp, sp, -16 -; CHECK-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; CHECK-NEXT: sw s1, 8(sp) # 4-byte Folded Spill ; CHECK-NEXT: mv a5, a4 -; CHECK-NEXT: mv s1, a2 ; CHECK-NEXT: mv a4, a3 -; CHECK-NEXT: mv s0, a1 +; CHECK-NEXT: mv a3, a2 +; CHECK-NEXT: mv a2, a1 ; CHECK-NEXT: #APP -; CHECK-NEXT: fsgnjx.d a2, s0, a4 +; CHECK-NEXT: fsgnjx.d a2, a2, a4 ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: sw a2, 8(a0) ; CHECK-NEXT: sw a3, 12(a0) -; CHECK-NEXT: lw s0, 12(sp) # 4-byte Folded Reload -; CHECK-NEXT: lw s1, 8(sp) # 4-byte Folded Reload -; CHECK-NEXT: addi sp, sp, 16 ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds double, ptr %a, i32 1 diff --git a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll index 9a312d9daca8d..05af53bf8a2b4 100644 --- a/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll +++ b/llvm/test/CodeGen/RISCV/zdinx-boundary-check.ll @@ -39,9 +39,9 @@ define void @foo2(ptr nocapture %p, double %d) nounwind { ; RV32ZDINX-LABEL: foo2: ; RV32ZDINX: # %bb.0: # %entry ; RV32ZDINX-NEXT: mv a3, a2 -; RV32ZDINX-NEXT: addi a0, a0, 2047 ; RV32ZDINX-NEXT: mv a2, a1 ; RV32ZDINX-NEXT: fadd.d a2, a2, a2 +; RV32ZDINX-NEXT: addi a0, a0, 2047 ; RV32ZDINX-NEXT: sw a2, -3(a0) ; RV32ZDINX-NEXT: sw a3, 1(a0) ; RV32ZDINX-NEXT: ret @@ -49,9 +49,9 @@ define void @foo2(ptr nocapture %p, double %d) nounwind { ; RV32ZDINXUALIGNED-LABEL: foo2: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: mv a3, a2 -; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 ; RV32ZDINXUALIGNED-NEXT: mv a2, a1 ; RV32ZDINXUALIGNED-NEXT: fadd.d a2, a2, a2 +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 ; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) ; RV32ZDINXUALIGNED-NEXT: sw a3, 1(a0) ; RV32ZDINXUALIGNED-NEXT: ret @@ -108,36 +108,36 @@ define void @foo4(ptr %p) nounwind { ; RV32ZDINX-LABEL: foo4: ; RV32ZDINX: # %bb.0: # %entry ; RV32ZDINX-NEXT: addi sp, sp, -16 -; RV32ZDINX-NEXT: addi a1, a0, 2047 -; RV32ZDINX-NEXT: lw a2, -3(a1) -; RV32ZDINX-NEXT: lw a3, 1(a1) ; RV32ZDINX-NEXT: sw a0, 8(sp) -; RV32ZDINX-NEXT: lui a0, %hi(d) -; RV32ZDINX-NEXT: sw a2, %lo(d)(a0) -; RV32ZDINX-NEXT: sw a3, %lo(d+4)(a0) +; RV32ZDINX-NEXT: addi a0, a0, 2047 +; RV32ZDINX-NEXT: lw a1, 1(a0) +; RV32ZDINX-NEXT: lw a0, -3(a0) +; RV32ZDINX-NEXT: lui a2, %hi(d) +; RV32ZDINX-NEXT: sw a0, %lo(d)(a2) +; RV32ZDINX-NEXT: sw a1, %lo(d+4)(a2) ; RV32ZDINX-NEXT: addi sp, sp, 16 ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo4: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16 -; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047 -; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1) -; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1) ; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp) -; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(d) -; RV32ZDINXUALIGNED-NEXT: sw a2, %lo(d)(a0) -; RV32ZDINXUALIGNED-NEXT: sw a3, %lo(d+4)(a0) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: lw a1, 1(a0) +; RV32ZDINXUALIGNED-NEXT: lw a0, -3(a0) +; RV32ZDINXUALIGNED-NEXT: lui a2, %hi(d) +; RV32ZDINXUALIGNED-NEXT: sw a0, %lo(d)(a2) +; RV32ZDINXUALIGNED-NEXT: sw a1, %lo(d+4)(a2) ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16 ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo4: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: addi sp, sp, -16 -; RV64ZDINX-NEXT: ld a1, 2044(a0) ; RV64ZDINX-NEXT: sd a0, 8(sp) -; RV64ZDINX-NEXT: lui a0, %hi(d) -; RV64ZDINX-NEXT: sd a1, %lo(d)(a0) +; RV64ZDINX-NEXT: ld a0, 2044(a0) +; RV64ZDINX-NEXT: lui a1, %hi(d) +; RV64ZDINX-NEXT: sd a0, %lo(d)(a1) ; RV64ZDINX-NEXT: addi sp, sp, 16 ; RV64ZDINX-NEXT: ret entry: @@ -184,10 +184,10 @@ define void @foo6(ptr %p, double %d) nounwind { ; RV32ZDINX-LABEL: foo6: ; RV32ZDINX: # %bb.0: # %entry ; RV32ZDINX-NEXT: mv a3, a2 -; RV32ZDINX-NEXT: lui a2, %hi(.LCPI5_0) -; RV32ZDINX-NEXT: lw a4, %lo(.LCPI5_0)(a2) -; RV32ZDINX-NEXT: lw a5, %lo(.LCPI5_0+4)(a2) ; RV32ZDINX-NEXT: mv a2, a1 +; RV32ZDINX-NEXT: lui a1, %hi(.LCPI5_0) +; RV32ZDINX-NEXT: lw a4, %lo(.LCPI5_0)(a1) +; RV32ZDINX-NEXT: lw a5, %lo(.LCPI5_0+4)(a1) ; RV32ZDINX-NEXT: fadd.d a2, a2, a4 ; RV32ZDINX-NEXT: addi a0, a0, 2047 ; RV32ZDINX-NEXT: sw a2, -3(a0) @@ -197,10 +197,10 @@ define void @foo6(ptr %p, double %d) nounwind { ; RV32ZDINXUALIGNED-LABEL: foo6: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: mv a3, a2 -; RV32ZDINXUALIGNED-NEXT: lui a2, %hi(.LCPI5_0) -; RV32ZDINXUALIGNED-NEXT: lw a4, %lo(.LCPI5_0)(a2) -; RV32ZDINXUALIGNED-NEXT: lw a5, %lo(.LCPI5_0+4)(a2) ; RV32ZDINXUALIGNED-NEXT: mv a2, a1 +; RV32ZDINXUALIGNED-NEXT: lui a1, %hi(.LCPI5_0) +; RV32ZDINXUALIGNED-NEXT: lw a4, %lo(.LCPI5_0)(a1) +; RV32ZDINXUALIGNED-NEXT: lw a5, %lo(.LCPI5_0+4)(a1) ; RV32ZDINXUALIGNED-NEXT: fadd.d a2, a2, a4 ; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 ; RV32ZDINXUALIGNED-NEXT: sw a2, -3(a0) @@ -226,10 +226,10 @@ define void @foo7(ptr nocapture %p) nounwind { ; RV32ZDINX: # %bb.0: # %entry ; RV32ZDINX-NEXT: addi sp, sp, -16 ; RV32ZDINX-NEXT: lui a1, %hi(d) -; RV32ZDINX-NEXT: lw a2, %lo(d+4)(a1) -; RV32ZDINX-NEXT: addi a1, a1, %lo(d) -; RV32ZDINX-NEXT: sw a2, 8(sp) -; RV32ZDINX-NEXT: lw a1, 8(a1) +; RV32ZDINX-NEXT: addi a2, a1, %lo(d) +; RV32ZDINX-NEXT: lw a1, %lo(d+4)(a1) +; RV32ZDINX-NEXT: sw a1, 8(sp) +; RV32ZDINX-NEXT: lw a1, 8(a2) ; RV32ZDINX-NEXT: sw a1, 12(sp) ; RV32ZDINX-NEXT: lw a2, 8(sp) ; RV32ZDINX-NEXT: lw a3, 12(sp) @@ -254,8 +254,8 @@ define void @foo7(ptr nocapture %p) nounwind { ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: lui a1, %hi(d) ; RV64ZDINX-NEXT: addi a2, a1, %lo(d) -; RV64ZDINX-NEXT: lwu a2, 8(a2) ; RV64ZDINX-NEXT: lwu a1, %lo(d+4)(a1) +; RV64ZDINX-NEXT: lwu a2, 8(a2) ; RV64ZDINX-NEXT: slli a2, a2, 32 ; RV64ZDINX-NEXT: or a1, a2, a1 ; RV64ZDINX-NEXT: sd a1, 2044(a0) @@ -272,45 +272,45 @@ define void @foo8(ptr %p) nounwind { ; RV32ZDINX-LABEL: foo8: ; RV32ZDINX: # %bb.0: # %entry ; RV32ZDINX-NEXT: addi sp, sp, -16 -; RV32ZDINX-NEXT: addi a1, a0, 2047 -; RV32ZDINX-NEXT: lw a2, -3(a1) -; RV32ZDINX-NEXT: lw a3, 1(a1) ; RV32ZDINX-NEXT: sw a0, 8(sp) -; RV32ZDINX-NEXT: sw a2, 0(sp) -; RV32ZDINX-NEXT: sw a3, 4(sp) +; RV32ZDINX-NEXT: addi a0, a0, 2047 +; RV32ZDINX-NEXT: lw a1, 1(a0) +; RV32ZDINX-NEXT: lw a0, -3(a0) +; RV32ZDINX-NEXT: lui a2, %hi(d) +; RV32ZDINX-NEXT: addi a3, a2, %lo(d) +; RV32ZDINX-NEXT: sw a0, 0(sp) +; RV32ZDINX-NEXT: sw a1, 4(sp) ; RV32ZDINX-NEXT: lw a0, 4(sp) -; RV32ZDINX-NEXT: lui a1, %hi(d) -; RV32ZDINX-NEXT: addi a2, a1, %lo(d) -; RV32ZDINX-NEXT: sw a0, 8(a2) +; RV32ZDINX-NEXT: sw a0, 8(a3) ; RV32ZDINX-NEXT: lw a0, 0(sp) -; RV32ZDINX-NEXT: sw a0, %lo(d+4)(a1) +; RV32ZDINX-NEXT: sw a0, %lo(d+4)(a2) ; RV32ZDINX-NEXT: addi sp, sp, 16 ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo8: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16 -; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047 -; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1) -; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1) ; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp) -; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(d) -; RV32ZDINXUALIGNED-NEXT: addi a0, a0, %lo(d) -; RV32ZDINXUALIGNED-NEXT: sw a2, 4(a0) -; RV32ZDINXUALIGNED-NEXT: sw a3, 8(a0) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: lw a1, 1(a0) +; RV32ZDINXUALIGNED-NEXT: lw a0, -3(a0) +; RV32ZDINXUALIGNED-NEXT: lui a2, %hi(d) +; RV32ZDINXUALIGNED-NEXT: addi a2, a2, %lo(d) +; RV32ZDINXUALIGNED-NEXT: sw a0, 4(a2) +; RV32ZDINXUALIGNED-NEXT: sw a1, 8(a2) ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16 ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo8: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: addi sp, sp, -16 -; RV64ZDINX-NEXT: ld a1, 2044(a0) ; RV64ZDINX-NEXT: sd a0, 8(sp) -; RV64ZDINX-NEXT: lui a0, %hi(d) -; RV64ZDINX-NEXT: addi a2, a0, %lo(d) -; RV64ZDINX-NEXT: sw a1, %lo(d+4)(a0) -; RV64ZDINX-NEXT: srli a1, a1, 32 -; RV64ZDINX-NEXT: sw a1, 8(a2) +; RV64ZDINX-NEXT: ld a0, 2044(a0) +; RV64ZDINX-NEXT: lui a1, %hi(d) +; RV64ZDINX-NEXT: addi a2, a1, %lo(d) +; RV64ZDINX-NEXT: sw a0, %lo(d+4)(a1) +; RV64ZDINX-NEXT: srli a0, a0, 32 +; RV64ZDINX-NEXT: sw a0, 8(a2) ; RV64ZDINX-NEXT: addi sp, sp, 16 ; RV64ZDINX-NEXT: ret entry: @@ -358,11 +358,11 @@ define void @foo9(ptr nocapture %p) nounwind { ; RV64ZDINX-LABEL: foo9: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: lui a1, %hi(e) -; RV64ZDINX-NEXT: addi a2, a1, %lo(e) -; RV64ZDINX-NEXT: lwu a2, 4(a2) -; RV64ZDINX-NEXT: lwu a1, %lo(e)(a1) -; RV64ZDINX-NEXT: slli a2, a2, 32 -; RV64ZDINX-NEXT: or a1, a2, a1 +; RV64ZDINX-NEXT: lwu a2, %lo(e)(a1) +; RV64ZDINX-NEXT: addi a1, a1, %lo(e) +; RV64ZDINX-NEXT: lwu a1, 4(a1) +; RV64ZDINX-NEXT: slli a1, a1, 32 +; RV64ZDINX-NEXT: or a1, a1, a2 ; RV64ZDINX-NEXT: sd a1, 2044(a0) ; RV64ZDINX-NEXT: ret entry: @@ -380,41 +380,41 @@ define void @foo10(ptr %p) nounwind { ; RV32ZDINX-NEXT: lw a2, -3(a1) ; RV32ZDINX-NEXT: lw a3, 1(a1) ; RV32ZDINX-NEXT: sw a0, 8(sp) +; RV32ZDINX-NEXT: lui a0, %hi(e) ; RV32ZDINX-NEXT: sw a2, 0(sp) ; RV32ZDINX-NEXT: sw a3, 4(sp) -; RV32ZDINX-NEXT: lw a0, 4(sp) -; RV32ZDINX-NEXT: lui a1, %hi(e) -; RV32ZDINX-NEXT: addi a2, a1, %lo(e) -; RV32ZDINX-NEXT: sw a0, 4(a2) -; RV32ZDINX-NEXT: lw a0, 0(sp) -; RV32ZDINX-NEXT: sw a0, %lo(e)(a1) +; RV32ZDINX-NEXT: addi a1, a0, %lo(e) +; RV32ZDINX-NEXT: lw a2, 4(sp) +; RV32ZDINX-NEXT: sw a2, 4(a1) +; RV32ZDINX-NEXT: lw a1, 0(sp) +; RV32ZDINX-NEXT: sw a1, %lo(e)(a0) ; RV32ZDINX-NEXT: addi sp, sp, 16 ; RV32ZDINX-NEXT: ret ; ; RV32ZDINXUALIGNED-LABEL: foo10: ; RV32ZDINXUALIGNED: # %bb.0: # %entry ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, -16 -; RV32ZDINXUALIGNED-NEXT: addi a1, a0, 2047 -; RV32ZDINXUALIGNED-NEXT: lw a2, -3(a1) -; RV32ZDINXUALIGNED-NEXT: lw a3, 1(a1) ; RV32ZDINXUALIGNED-NEXT: sw a0, 8(sp) -; RV32ZDINXUALIGNED-NEXT: lui a0, %hi(e) -; RV32ZDINXUALIGNED-NEXT: addi a0, a0, %lo(e) -; RV32ZDINXUALIGNED-NEXT: sw a2, 0(a0) -; RV32ZDINXUALIGNED-NEXT: sw a3, 4(a0) +; RV32ZDINXUALIGNED-NEXT: addi a0, a0, 2047 +; RV32ZDINXUALIGNED-NEXT: lw a1, 1(a0) +; RV32ZDINXUALIGNED-NEXT: lw a0, -3(a0) +; RV32ZDINXUALIGNED-NEXT: lui a2, %hi(e) +; RV32ZDINXUALIGNED-NEXT: addi a2, a2, %lo(e) +; RV32ZDINXUALIGNED-NEXT: sw a0, 0(a2) +; RV32ZDINXUALIGNED-NEXT: sw a1, 4(a2) ; RV32ZDINXUALIGNED-NEXT: addi sp, sp, 16 ; RV32ZDINXUALIGNED-NEXT: ret ; ; RV64ZDINX-LABEL: foo10: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: addi sp, sp, -16 -; RV64ZDINX-NEXT: ld a1, 2044(a0) ; RV64ZDINX-NEXT: sd a0, 8(sp) -; RV64ZDINX-NEXT: lui a0, %hi(e) -; RV64ZDINX-NEXT: sw a1, %lo(e)(a0) -; RV64ZDINX-NEXT: addi a0, a0, %lo(e) -; RV64ZDINX-NEXT: srli a1, a1, 32 -; RV64ZDINX-NEXT: sw a1, 4(a0) +; RV64ZDINX-NEXT: ld a0, 2044(a0) +; RV64ZDINX-NEXT: lui a1, %hi(e) +; RV64ZDINX-NEXT: sw a0, %lo(e)(a1) +; RV64ZDINX-NEXT: addi a1, a1, %lo(e) +; RV64ZDINX-NEXT: srli a0, a0, 32 +; RV64ZDINX-NEXT: sw a0, 4(a1) ; RV64ZDINX-NEXT: addi sp, sp, 16 ; RV64ZDINX-NEXT: ret entry: @@ -521,10 +521,10 @@ define double @foo13(ptr nocapture %p) nounwind { ; RV64ZDINX-LABEL: foo13: ; RV64ZDINX: # %bb.0: # %entry ; RV64ZDINX-NEXT: lui a0, %hi(f) -; RV64ZDINX-NEXT: lwu a1, %lo(f+8)(a0) -; RV64ZDINX-NEXT: lwu a0, %lo(f+4)(a0) -; RV64ZDINX-NEXT: slli a1, a1, 32 -; RV64ZDINX-NEXT: or a0, a1, a0 +; RV64ZDINX-NEXT: lwu a1, %lo(f+4)(a0) +; RV64ZDINX-NEXT: lwu a0, %lo(f+8)(a0) +; RV64ZDINX-NEXT: slli a0, a0, 32 +; RV64ZDINX-NEXT: or a0, a0, a1 ; RV64ZDINX-NEXT: ret entry: %add.ptr = getelementptr inbounds i8, ptr @f, i64 4