From f74a607fb7a6d3a632cf40a0abcac3078f9cc73b Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Tue, 19 Aug 2025 17:24:30 -0700 Subject: [PATCH 1/9] Pre-commit tests --- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 111 +++++++++ .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 211 ++++++++++++++++++ 2 files changed, 322 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 3c3e08d387faa..d4d4ed682f6cf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1828,3 +1828,114 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d %v7 = insertelement <8 x double> %v6, double %e7, i64 7 ret <8 x double> %v7 } + +define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6) vscale_range(4, 128) { +; CHECK-LABEL: buildvec_slideup: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 +; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: ret + %v0 = insertelement <8 x double> poison, double %e0, i64 0 + %v1 = insertelement <8 x double> %v0, double %e1, i64 1 + %v2 = insertelement <8 x double> %v1, double %e2, i64 2 + %v3 = insertelement <8 x double> %v2, double %e3, i64 3 + %v4 = insertelement <8 x double> %v3, double %e4, i64 4 + %v5 = insertelement <8 x double> %v4, double %e5, i64 5 + %v6 = insertelement <8 x double> %v5, double %e6, i64 6 + %e7 = extractelement <4 x double> %v, i64 0 + %v7 = insertelement <8 x double> %v6, double %e7, i64 7 + ret <8 x double> %v7 +} + +; Negative test for slideup lowering where the extract_element was not build_vector's last operand. +define <8 x double> @buildvec_slideup_not_last_element(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e7) vscale_range(4, 128) { +; CHECK-LABEL: buildvec_slideup_not_last_element: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma +; CHECK-NEXT: vfmv.f.s ft0, v8 +; CHECK-NEXT: vfmv.v.f v8, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 +; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 +; CHECK-NEXT: ret + %v0 = insertelement <8 x double> poison, double %e0, i64 0 + %v1 = insertelement <8 x double> %v0, double %e1, i64 1 + %v2 = insertelement <8 x double> %v1, double %e2, i64 2 + %v3 = insertelement <8 x double> %v2, double %e3, i64 3 + %v4 = insertelement <8 x double> %v3, double %e4, i64 4 + %v5 = insertelement <8 x double> %v4, double %e5, i64 5 + %e6 = extractelement <4 x double> %v, i64 0 + %v6 = insertelement <8 x double> %v5, double %e6, i64 6 + %v7 = insertelement <8 x double> %v6, double %e7, i64 7 + ret <8 x double> %v7 +} + +define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind { +; CHECK-LABEL: buildvec_vfredusum: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v16 +; CHECK-NEXT: vfredusum.vs v9, v10, v16 +; CHECK-NEXT: vfredusum.vs v10, v12, v16 +; CHECK-NEXT: vfredusum.vs v11, v14, v16 +; CHECK-NEXT: vfmv.f.s fa5, v9 +; CHECK-NEXT: vfmv.f.s fa4, v10 +; CHECK-NEXT: vfmv.f.s fa3, v11 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vrgather.vi v9, v8, 0 +; CHECK-NEXT: vfslide1down.vf v8, v9, fa5 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: ret + %247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) + %248 = insertelement <4 x float> poison, float %247, i64 0 + %250 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2) + %251 = insertelement <4 x float> %248, float %250, i64 1 + %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3) + %253 = insertelement <4 x float> %251, float %252, i64 2 + %254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4) + %255 = insertelement <4 x float> %253, float %254, i64 3 + ret <4 x float> %255 +} + +define <4 x float> @buildvec_vfredosum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind { +; CHECK-LABEL: buildvec_vfredosum: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vfredosum.vs v8, v8, v16 +; CHECK-NEXT: vfredosum.vs v9, v10, v16 +; CHECK-NEXT: vfredosum.vs v10, v12, v16 +; CHECK-NEXT: vfredosum.vs v11, v14, v16 +; CHECK-NEXT: vfmv.f.s fa5, v9 +; CHECK-NEXT: vfmv.f.s fa4, v10 +; CHECK-NEXT: vfmv.f.s fa3, v11 +; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-NEXT: vrgather.vi v9, v8, 0 +; CHECK-NEXT: vfslide1down.vf v8, v9, fa5 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: ret + %247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) + %248 = insertelement <4 x float> poison, float %247, i64 0 + %250 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2) + %251 = insertelement <4 x float> %248, float %250, i64 1 + %252 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3) + %253 = insertelement <4 x float> %251, float %252, i64 2 + %254 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4) + %255 = insertelement <4 x float> %253, float %254, i64 3 + ret <4 x float> %255 +} diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index d9bb007a10f71..d9122676e805f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -3416,5 +3416,216 @@ define <4 x i1> @buildvec_i1_splat(i1 %e1) { ret <4 x i1> %v4 } +define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind { +; RV32-LABEL: buildvec_vredsum: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vmv.s.x v16, zero +; RV32-NEXT: vredsum.vs v8, v8, v16 +; RV32-NEXT: vredsum.vs v9, v10, v16 +; RV32-NEXT: vredsum.vs v10, v12, v16 +; RV32-NEXT: vredsum.vs v11, v14, v16 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a2, v10 +; RV32-NEXT: vmv.x.s a3, v11 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: ret +; +; RV64V-ONLY-LABEL: buildvec_vredsum: +; RV64V-ONLY: # %bb.0: +; RV64V-ONLY-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-ONLY-NEXT: vmv.s.x v16, zero +; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16 +; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16 +; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16 +; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16 +; RV64V-ONLY-NEXT: vmv.x.s a0, v8 +; RV64V-ONLY-NEXT: vmv.x.s a1, v9 +; RV64V-ONLY-NEXT: vmv.x.s a2, v10 +; RV64V-ONLY-NEXT: vmv.x.s a3, v11 +; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64V-ONLY-NEXT: vmv.v.x v8, a0 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV64V-ONLY-NEXT: ret +; +; RVA22U64-LABEL: buildvec_vredsum: +; RVA22U64: # %bb.0: +; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVA22U64-NEXT: vmv.s.x v16, zero +; RVA22U64-NEXT: vredsum.vs v8, v8, v16 +; RVA22U64-NEXT: vredsum.vs v9, v10, v16 +; RVA22U64-NEXT: vredsum.vs v10, v12, v16 +; RVA22U64-NEXT: vredsum.vs v11, v14, v16 +; RVA22U64-NEXT: vmv.x.s a0, v8 +; RVA22U64-NEXT: vmv.x.s a1, v9 +; RVA22U64-NEXT: vmv.x.s a2, v10 +; RVA22U64-NEXT: slli a1, a1, 32 +; RVA22U64-NEXT: add.uw a0, a0, a1 +; RVA22U64-NEXT: vmv.x.s a1, v11 +; RVA22U64-NEXT: slli a1, a1, 32 +; RVA22U64-NEXT: add.uw a1, a2, a1 +; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.v.x v8, a0 +; RVA22U64-NEXT: vslide1down.vx v8, v8, a1 +; RVA22U64-NEXT: ret +; +; RVA22U64-PACK-LABEL: buildvec_vredsum: +; RVA22U64-PACK: # %bb.0: +; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVA22U64-PACK-NEXT: vmv.s.x v16, zero +; RVA22U64-PACK-NEXT: vredsum.vs v8, v8, v16 +; RVA22U64-PACK-NEXT: vredsum.vs v9, v10, v16 +; RVA22U64-PACK-NEXT: vredsum.vs v10, v12, v16 +; RVA22U64-PACK-NEXT: vredsum.vs v11, v14, v16 +; RVA22U64-PACK-NEXT: vmv.x.s a0, v8 +; RVA22U64-PACK-NEXT: vmv.x.s a1, v9 +; RVA22U64-PACK-NEXT: vmv.x.s a2, v10 +; RVA22U64-PACK-NEXT: pack a0, a0, a1 +; RVA22U64-PACK-NEXT: vmv.x.s a1, v11 +; RVA22U64-PACK-NEXT: pack a1, a2, a1 +; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVA22U64-PACK-NEXT: vmv.v.x v8, a0 +; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a1 +; RVA22U64-PACK-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_vredsum: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32-NEXT: vmv.s.x v16, zero +; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16 +; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16 +; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16 +; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16 +; RV64ZVE32-NEXT: vmv.x.s a0, v8 +; RV64ZVE32-NEXT: vmv.x.s a1, v9 +; RV64ZVE32-NEXT: vmv.x.s a2, v10 +; RV64ZVE32-NEXT: vmv.x.s a3, v11 +; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64ZVE32-NEXT: vmv.v.x v8, a0 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32-NEXT: ret + %247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0) + %248 = insertelement <4 x i32> poison, i32 %247, i64 0 + %250 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg1) + %251 = insertelement <4 x i32> %248, i32 %250, i64 1 + %252 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg2) + %253 = insertelement <4 x i32> %251, i32 %252, i64 2 + %254 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg3) + %255 = insertelement <4 x i32> %253, i32 %254, i64 3 + ret <4 x i32> %255 +} + +define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind { +; RV32-LABEL: buildvec_vredmax: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vredmaxu.vs v8, v8, v8 +; RV32-NEXT: vredmaxu.vs v9, v10, v10 +; RV32-NEXT: vredmaxu.vs v10, v12, v12 +; RV32-NEXT: vredmaxu.vs v11, v14, v14 +; RV32-NEXT: vmv.x.s a0, v8 +; RV32-NEXT: vmv.x.s a1, v9 +; RV32-NEXT: vmv.x.s a2, v10 +; RV32-NEXT: vmv.x.s a3, v11 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.v.x v8, a0 +; RV32-NEXT: vslide1down.vx v8, v8, a1 +; RV32-NEXT: vslide1down.vx v8, v8, a2 +; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: ret +; +; RV64V-ONLY-LABEL: buildvec_vredmax: +; RV64V-ONLY: # %bb.0: +; RV64V-ONLY-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8 +; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10 +; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12 +; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14 +; RV64V-ONLY-NEXT: vmv.x.s a0, v8 +; RV64V-ONLY-NEXT: vmv.x.s a1, v9 +; RV64V-ONLY-NEXT: vmv.x.s a2, v10 +; RV64V-ONLY-NEXT: vmv.x.s a3, v11 +; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64V-ONLY-NEXT: vmv.v.x v8, a0 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 +; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV64V-ONLY-NEXT: ret +; +; RVA22U64-LABEL: buildvec_vredmax: +; RVA22U64: # %bb.0: +; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVA22U64-NEXT: vredmaxu.vs v8, v8, v8 +; RVA22U64-NEXT: vredmaxu.vs v9, v10, v10 +; RVA22U64-NEXT: vredmaxu.vs v10, v12, v12 +; RVA22U64-NEXT: vredmaxu.vs v11, v14, v14 +; RVA22U64-NEXT: vmv.x.s a0, v8 +; RVA22U64-NEXT: vmv.x.s a1, v9 +; RVA22U64-NEXT: vmv.x.s a2, v10 +; RVA22U64-NEXT: slli a1, a1, 32 +; RVA22U64-NEXT: add.uw a0, a0, a1 +; RVA22U64-NEXT: vmv.x.s a1, v11 +; RVA22U64-NEXT: slli a1, a1, 32 +; RVA22U64-NEXT: add.uw a1, a2, a1 +; RVA22U64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVA22U64-NEXT: vmv.v.x v8, a0 +; RVA22U64-NEXT: vslide1down.vx v8, v8, a1 +; RVA22U64-NEXT: ret +; +; RVA22U64-PACK-LABEL: buildvec_vredmax: +; RVA22U64-PACK: # %bb.0: +; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RVA22U64-PACK-NEXT: vredmaxu.vs v8, v8, v8 +; RVA22U64-PACK-NEXT: vredmaxu.vs v9, v10, v10 +; RVA22U64-PACK-NEXT: vredmaxu.vs v10, v12, v12 +; RVA22U64-PACK-NEXT: vredmaxu.vs v11, v14, v14 +; RVA22U64-PACK-NEXT: vmv.x.s a0, v8 +; RVA22U64-PACK-NEXT: vmv.x.s a1, v9 +; RVA22U64-PACK-NEXT: vmv.x.s a2, v10 +; RVA22U64-PACK-NEXT: pack a0, a0, a1 +; RVA22U64-PACK-NEXT: vmv.x.s a1, v11 +; RVA22U64-PACK-NEXT: pack a1, a2, a1 +; RVA22U64-PACK-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RVA22U64-PACK-NEXT: vmv.v.x v8, a0 +; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a1 +; RVA22U64-PACK-NEXT: ret +; +; RV64ZVE32-LABEL: buildvec_vredmax: +; RV64ZVE32: # %bb.0: +; RV64ZVE32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8 +; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10 +; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12 +; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14 +; RV64ZVE32-NEXT: vmv.x.s a0, v8 +; RV64ZVE32-NEXT: vmv.x.s a1, v9 +; RV64ZVE32-NEXT: vmv.x.s a2, v10 +; RV64ZVE32-NEXT: vmv.x.s a3, v11 +; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64ZVE32-NEXT: vmv.v.x v8, a0 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 +; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32-NEXT: ret + %247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0) + %248 = insertelement <4 x i32> poison, i32 %247, i64 0 + %250 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg1) + %251 = insertelement <4 x i32> %248, i32 %250, i64 1 + %252 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg2) + %253 = insertelement <4 x i32> %251, i32 %252, i64 2 + %254 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg3) + %255 = insertelement <4 x i32> %253, i32 %254, i64 3 + ret <4 x i32> %255 +} + ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; RV64: {{.*}} From c5b56c25754c5fd684ca69aa63cf603b7b235339 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Tue, 19 Aug 2025 13:34:36 -0700 Subject: [PATCH 2/9] [RISCV] Use slideup when the last build_vector operand is a reduction --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 85 ++++++++++++++++--- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 30 +++---- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 60 ++++++------- .../CodeGen/RISCV/rvv/redundant-vfmvsf.ll | 7 +- 4 files changed, 114 insertions(+), 68 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4a1db80076530..ce6fc8425856a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4512,33 +4512,94 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, "Illegal type which will result in reserved encoding"); const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC; + auto getVSlide = [&](bool SlideUp, EVT ContainerVT, SDValue Passthru, + SDValue Vec, SDValue Offset, SDValue Mask, + SDValue VL) -> SDValue { + if (SlideUp) + return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, + Mask, VL, Policy); + return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, + Mask, VL, Policy); + }; + + // General case: splat the first operand and sliding other operands down one + // by one to form a vector. Alternatively, if the last operand is an + // extraction from a reduction result, we can use the original vector + // reduction result as the start value and slide up instead of slide down. + // Such that we can avoid the splat. + SmallVector Operands(Op->op_begin(), Op->op_end()); + SDValue Reduce; + bool SlideUp = false; + // Find the first first non-undef from the tail. + auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(), + [](SDValue V) { return !V.isUndef(); }); + if (ItLastNonUndef != Operands.rend()) { + using namespace SDPatternMatch; + // Check if the last non-undef operand was extracted from a reduction. + for (unsigned Opc : + {RISCVISD::VECREDUCE_ADD_VL, RISCVISD::VECREDUCE_UMAX_VL, + RISCVISD::VECREDUCE_SMAX_VL, RISCVISD::VECREDUCE_UMIN_VL, + RISCVISD::VECREDUCE_SMIN_VL, RISCVISD::VECREDUCE_AND_VL, + RISCVISD::VECREDUCE_OR_VL, RISCVISD::VECREDUCE_XOR_VL, + RISCVISD::VECREDUCE_FADD_VL, RISCVISD::VECREDUCE_SEQ_FADD_VL, + RISCVISD::VECREDUCE_FMAX_VL, RISCVISD::VECREDUCE_FMIN_VL}) { + SlideUp = sd_match( + *ItLastNonUndef, + m_ExtractElt(m_AllOf(m_Opc(Opc), m_Value(Reduce)), m_Zero())); + if (SlideUp) + break; + } + } + + if (SlideUp) { + // Adapt Reduce's type into ContainerVT. + if (Reduce.getValueType().getVectorMinNumElements() < + ContainerVT.getVectorMinNumElements()) + Reduce = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Reduce, 0); + else + Reduce = DAG.getExtractSubvector(DL, ContainerVT, Reduce, 0); + + // Reverse the elements as we're going to slide up from the last element. + for (unsigned i = 0U, N = Operands.size(), H = divideCeil(N, 2); i < H; ++i) + std::swap(Operands[i], Operands[N - 1 - i]); + } SDValue Vec; UndefCount = 0; - for (SDValue V : Op->ops()) { + for (SDValue V : Operands) { if (V.isUndef()) { UndefCount++; continue; } - // Start our sequence with a TA splat in the hopes that hardware is able to - // recognize there's no dependency on the prior value of our temporary - // register. + // Start our sequence with either a TA splat or a reduction result in the + // hopes that hardware is able to recognize there's no dependency on the + // prior value of our temporary register. if (!Vec) { - Vec = DAG.getSplatVector(VT, DL, V); - Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + if (SlideUp) { + Vec = Reduce; + } else { + Vec = DAG.getSplatVector(VT, DL, V); + Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); + } + UndefCount = 0; continue; } if (UndefCount) { const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); - Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), - Vec, Offset, Mask, VL, Policy); + Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, + Offset, Mask, VL); UndefCount = 0; } - auto OpCode = - VT.isFloatingPoint() ? RISCVISD::VFSLIDE1DOWN_VL : RISCVISD::VSLIDE1DOWN_VL; + + unsigned OpCode; + if (VT.isFloatingPoint()) + OpCode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL; + else + OpCode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL; + if (!VT.isFloatingPoint()) V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V); Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, @@ -4546,8 +4607,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, } if (UndefCount) { const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); - Vec = getVSlidedown(DAG, Subtarget, DL, ContainerVT, DAG.getUNDEF(ContainerVT), - Vec, Offset, Mask, VL, Policy); + Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, + Offset, Mask, VL); } return convertFromScalableVector(VT, Vec, DAG, Subtarget); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index d4d4ed682f6cf..972e565ba8657 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1890,15 +1890,14 @@ define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x flo ; CHECK-NEXT: vfredusum.vs v8, v8, v16 ; CHECK-NEXT: vfredusum.vs v9, v10, v16 ; CHECK-NEXT: vfredusum.vs v10, v12, v16 -; CHECK-NEXT: vfredusum.vs v11, v14, v16 -; CHECK-NEXT: vfmv.f.s fa5, v9 -; CHECK-NEXT: vfmv.f.s fa4, v10 -; CHECK-NEXT: vfmv.f.s fa3, v11 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: vfmv.f.s fa4, v9 +; CHECK-NEXT: vfmv.f.s fa3, v10 +; CHECK-NEXT: vfredusum.vs v8, v14, v16 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vrgather.vi v9, v8, 0 -; CHECK-NEXT: vfslide1down.vf v8, v9, fa5 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vfslide1up.vf v9, v8, fa3 +; CHECK-NEXT: vfslide1up.vf v10, v9, fa4 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa5 ; CHECK-NEXT: ret %247 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) %248 = insertelement <4 x float> poison, float %247, i64 0 @@ -1919,15 +1918,14 @@ define <4 x float> @buildvec_vfredosum(float %start, <8 x float> %arg1, <8 x flo ; CHECK-NEXT: vfredosum.vs v8, v8, v16 ; CHECK-NEXT: vfredosum.vs v9, v10, v16 ; CHECK-NEXT: vfredosum.vs v10, v12, v16 -; CHECK-NEXT: vfredosum.vs v11, v14, v16 -; CHECK-NEXT: vfmv.f.s fa5, v9 -; CHECK-NEXT: vfmv.f.s fa4, v10 -; CHECK-NEXT: vfmv.f.s fa3, v11 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: vfmv.f.s fa4, v9 +; CHECK-NEXT: vfmv.f.s fa3, v10 +; CHECK-NEXT: vfredosum.vs v8, v14, v16 ; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vrgather.vi v9, v8, 0 -; CHECK-NEXT: vfslide1down.vf v8, v9, fa5 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 +; CHECK-NEXT: vfslide1up.vf v9, v8, fa3 +; CHECK-NEXT: vfslide1up.vf v10, v9, fa4 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa5 ; CHECK-NEXT: ret %247 = tail call float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) %248 = insertelement <4 x float> poison, float %247, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index d9122676e805f..a02117fdd2833 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -3424,16 +3424,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RV32-NEXT: vredsum.vs v8, v8, v16 ; RV32-NEXT: vredsum.vs v9, v10, v16 ; RV32-NEXT: vredsum.vs v10, v12, v16 -; RV32-NEXT: vredsum.vs v11, v14, v16 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a2, v10 -; RV32-NEXT: vmv.x.s a3, v11 +; RV32-NEXT: vredsum.vs v8, v14, v16 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: vslide1up.vx v9, v8, a2 +; RV32-NEXT: vslide1up.vx v10, v9, a1 +; RV32-NEXT: vslide1up.vx v8, v10, a0 ; RV32-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_vredsum: @@ -3443,16 +3441,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RV64V-ONLY-NEXT: vredsum.vs v8, v8, v16 ; RV64V-ONLY-NEXT: vredsum.vs v9, v10, v16 ; RV64V-ONLY-NEXT: vredsum.vs v10, v12, v16 -; RV64V-ONLY-NEXT: vredsum.vs v11, v14, v16 ; RV64V-ONLY-NEXT: vmv.x.s a0, v8 ; RV64V-ONLY-NEXT: vmv.x.s a1, v9 ; RV64V-ONLY-NEXT: vmv.x.s a2, v10 -; RV64V-ONLY-NEXT: vmv.x.s a3, v11 +; RV64V-ONLY-NEXT: vredsum.vs v8, v14, v16 ; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.v.x v8, a0 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2 +; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1 +; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_vredsum: @@ -3502,16 +3498,14 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RV64ZVE32-NEXT: vredsum.vs v8, v8, v16 ; RV64ZVE32-NEXT: vredsum.vs v9, v10, v16 ; RV64ZVE32-NEXT: vredsum.vs v10, v12, v16 -; RV64ZVE32-NEXT: vredsum.vs v11, v14, v16 ; RV64ZVE32-NEXT: vmv.x.s a0, v8 ; RV64ZVE32-NEXT: vmv.x.s a1, v9 ; RV64ZVE32-NEXT: vmv.x.s a2, v10 -; RV64ZVE32-NEXT: vmv.x.s a3, v11 +; RV64ZVE32-NEXT: vredsum.vs v8, v14, v16 ; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32-NEXT: vmv.v.x v8, a0 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2 +; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1 +; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0 ; RV64ZVE32-NEXT: ret %247 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %arg0) %248 = insertelement <4 x i32> poison, i32 %247, i64 0 @@ -3531,16 +3525,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vredmaxu.vs v9, v10, v10 ; RV32-NEXT: vredmaxu.vs v10, v12, v12 -; RV32-NEXT: vredmaxu.vs v11, v14, v14 ; RV32-NEXT: vmv.x.s a0, v8 ; RV32-NEXT: vmv.x.s a1, v9 ; RV32-NEXT: vmv.x.s a2, v10 -; RV32-NEXT: vmv.x.s a3, v11 +; RV32-NEXT: vredmaxu.vs v8, v14, v14 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vmv.v.x v8, a0 -; RV32-NEXT: vslide1down.vx v8, v8, a1 -; RV32-NEXT: vslide1down.vx v8, v8, a2 -; RV32-NEXT: vslide1down.vx v8, v8, a3 +; RV32-NEXT: vslide1up.vx v9, v8, a2 +; RV32-NEXT: vslide1up.vx v10, v9, a1 +; RV32-NEXT: vslide1up.vx v8, v10, a0 ; RV32-NEXT: ret ; ; RV64V-ONLY-LABEL: buildvec_vredmax: @@ -3549,16 +3541,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8 ; RV64V-ONLY-NEXT: vredmaxu.vs v9, v10, v10 ; RV64V-ONLY-NEXT: vredmaxu.vs v10, v12, v12 -; RV64V-ONLY-NEXT: vredmaxu.vs v11, v14, v14 ; RV64V-ONLY-NEXT: vmv.x.s a0, v8 ; RV64V-ONLY-NEXT: vmv.x.s a1, v9 ; RV64V-ONLY-NEXT: vmv.x.s a2, v10 -; RV64V-ONLY-NEXT: vmv.x.s a3, v11 +; RV64V-ONLY-NEXT: vredmaxu.vs v8, v14, v14 ; RV64V-ONLY-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64V-ONLY-NEXT: vmv.v.x v8, a0 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a1 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a2 -; RV64V-ONLY-NEXT: vslide1down.vx v8, v8, a3 +; RV64V-ONLY-NEXT: vslide1up.vx v9, v8, a2 +; RV64V-ONLY-NEXT: vslide1up.vx v10, v9, a1 +; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0 ; RV64V-ONLY-NEXT: ret ; ; RVA22U64-LABEL: buildvec_vredmax: @@ -3605,16 +3595,14 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8 ; RV64ZVE32-NEXT: vredmaxu.vs v9, v10, v10 ; RV64ZVE32-NEXT: vredmaxu.vs v10, v12, v12 -; RV64ZVE32-NEXT: vredmaxu.vs v11, v14, v14 ; RV64ZVE32-NEXT: vmv.x.s a0, v8 ; RV64ZVE32-NEXT: vmv.x.s a1, v9 ; RV64ZVE32-NEXT: vmv.x.s a2, v10 -; RV64ZVE32-NEXT: vmv.x.s a3, v11 +; RV64ZVE32-NEXT: vredmaxu.vs v8, v14, v14 ; RV64ZVE32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64ZVE32-NEXT: vmv.v.x v8, a0 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a1 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a2 -; RV64ZVE32-NEXT: vslide1down.vx v8, v8, a3 +; RV64ZVE32-NEXT: vslide1up.vx v9, v8, a2 +; RV64ZVE32-NEXT: vslide1up.vx v10, v9, a1 +; RV64ZVE32-NEXT: vslide1up.vx v8, v10, a0 ; RV64ZVE32-NEXT: ret %247 = tail call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %arg0) %248 = insertelement <4 x i32> poison, i32 %247, i64 0 diff --git a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll index da912bf401ec0..821d4240827fb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll +++ b/llvm/test/CodeGen/RISCV/rvv/redundant-vfmvsf.ll @@ -9,12 +9,11 @@ define <2 x float> @redundant_vfmv(<2 x float> %arg0, <64 x float> %arg1, <64 x ; CHECK-NEXT: vfredusum.vs v9, v12, v8 ; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; CHECK-NEXT: vslidedown.vi v8, v8, 1 +; CHECK-NEXT: vfmv.f.s fa5, v9 ; CHECK-NEXT: vsetvli zero, a0, e32, m4, ta, ma -; CHECK-NEXT: vfredusum.vs v8, v16, v8 -; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: vfredusum.vs v9, v16, v8 ; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma -; CHECK-NEXT: vrgather.vi v8, v9, 0 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 +; CHECK-NEXT: vfslide1up.vf v8, v9, fa5 ; CHECK-NEXT: ret %s0 = extractelement <2 x float> %arg0, i64 0 %r0 = tail call reassoc float @llvm.vector.reduce.fadd.v64f32(float %s0, <64 x float> %arg1) From 217402a2664de789b43ad1709e4b9785cd418fd0 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Wed, 20 Aug 2025 10:52:49 -0700 Subject: [PATCH 3/9] fixup! Generalize this into non-reduction operations as well --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 38 +++++++++---------- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 16 ++++---- 2 files changed, 24 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ce6fc8425856a..d7ce0dcbb4295 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4524,40 +4524,36 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // General case: splat the first operand and sliding other operands down one // by one to form a vector. Alternatively, if the last operand is an - // extraction from a reduction result, we can use the original vector + // extraction from element 0 of a vector, we can use the original vector // reduction result as the start value and slide up instead of slide down. // Such that we can avoid the splat. SmallVector Operands(Op->op_begin(), Op->op_end()); - SDValue Reduce; + SDValue EVec; bool SlideUp = false; // Find the first first non-undef from the tail. auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(), [](SDValue V) { return !V.isUndef(); }); if (ItLastNonUndef != Operands.rend()) { using namespace SDPatternMatch; - // Check if the last non-undef operand was extracted from a reduction. - for (unsigned Opc : - {RISCVISD::VECREDUCE_ADD_VL, RISCVISD::VECREDUCE_UMAX_VL, - RISCVISD::VECREDUCE_SMAX_VL, RISCVISD::VECREDUCE_UMIN_VL, - RISCVISD::VECREDUCE_SMIN_VL, RISCVISD::VECREDUCE_AND_VL, - RISCVISD::VECREDUCE_OR_VL, RISCVISD::VECREDUCE_XOR_VL, - RISCVISD::VECREDUCE_FADD_VL, RISCVISD::VECREDUCE_SEQ_FADD_VL, - RISCVISD::VECREDUCE_FMAX_VL, RISCVISD::VECREDUCE_FMIN_VL}) { - SlideUp = sd_match( - *ItLastNonUndef, - m_ExtractElt(m_AllOf(m_Opc(Opc), m_Value(Reduce)), m_Zero())); - if (SlideUp) - break; - } + // Check if the last non-undef operand was an extraction. + SlideUp = sd_match(*ItLastNonUndef, m_ExtractElt(m_Value(EVec), m_Zero())); } if (SlideUp) { - // Adapt Reduce's type into ContainerVT. - if (Reduce.getValueType().getVectorMinNumElements() < + MVT EVecContainerVT = EVec.getSimpleValueType(); + // Make sure the original vector has scalable vector type. + if (EVecContainerVT.isFixedLengthVector()) { + EVecContainerVT = + getContainerForFixedLengthVector(DAG, EVecContainerVT, Subtarget); + EVec = convertToScalableVector(EVecContainerVT, EVec, DAG, Subtarget); + } + + // Adapt EVec's type into ContainerVT. + if (EVecContainerVT.getVectorMinNumElements() < ContainerVT.getVectorMinNumElements()) - Reduce = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), Reduce, 0); + EVec = DAG.getInsertSubvector(DL, DAG.getUNDEF(ContainerVT), EVec, 0); else - Reduce = DAG.getExtractSubvector(DL, ContainerVT, Reduce, 0); + EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0); // Reverse the elements as we're going to slide up from the last element. for (unsigned i = 0U, N = Operands.size(), H = divideCeil(N, 2); i < H; ++i) @@ -4577,7 +4573,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // prior value of our temporary register. if (!Vec) { if (SlideUp) { - Vec = Reduce; + Vec = EVec; } else { Vec = DAG.getSplatVector(VT, DL, V); Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index 972e565ba8657..face756283c33 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1833,15 +1833,13 @@ define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, d ; CHECK-LABEL: buildvec_slideup: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa6 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa5 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa4 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa3 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa2 +; CHECK-NEXT: vfslide1up.vf v12, v10, fa1 +; CHECK-NEXT: vfslide1up.vf v8, v12, fa0 ; CHECK-NEXT: ret %v0 = insertelement <8 x double> poison, double %e0, i64 0 %v1 = insertelement <8 x double> %v0, double %e1, i64 1 From 3dec8ff38e69fa1400f30f54b58cc2aa92aade6f Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Wed, 20 Aug 2025 13:41:14 -0700 Subject: [PATCH 4/9] fixup! Use std::reverse --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index d7ce0dcbb4295..f358c3e77e1db 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4556,8 +4556,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, EVec = DAG.getExtractSubvector(DL, ContainerVT, EVec, 0); // Reverse the elements as we're going to slide up from the last element. - for (unsigned i = 0U, N = Operands.size(), H = divideCeil(N, 2); i < H; ++i) - std::swap(Operands[i], Operands[N - 1 - i]); + std::reverse(Operands.begin(), Operands.end()); } SDValue Vec; From 1d8b13ead5ed41753cbf06c1aedd6e02eca63c68 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Wed, 20 Aug 2025 14:37:41 -0700 Subject: [PATCH 5/9] Update llvm/lib/Target/RISCV/RISCVISelLowering.cpp Co-authored-by: Craig Topper --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index f358c3e77e1db..76ed91f26599b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4522,7 +4522,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Mask, VL, Policy); }; - // General case: splat the first operand and sliding other operands down one + // General case: splat the first operand and slide other operands down one // by one to form a vector. Alternatively, if the last operand is an // extraction from element 0 of a vector, we can use the original vector // reduction result as the start value and slide up instead of slide down. From ac83561053cc1056f20294cf544c8477874a8f5a Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Thu, 21 Aug 2025 11:05:24 -0700 Subject: [PATCH 6/9] fixup! Address review comments Co-Authored-By: Luke Lau --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 7 +++--- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 22 +++++++++++++++++++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 76ed91f26599b..4e697220cacee 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4524,9 +4524,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // General case: splat the first operand and slide other operands down one // by one to form a vector. Alternatively, if the last operand is an - // extraction from element 0 of a vector, we can use the original vector - // reduction result as the start value and slide up instead of slide down. - // Such that we can avoid the splat. + // extraction from element 0 of a vector, we can use that vector as the start + // value and slide up instead of slide down. Such that we can avoid the splat. SmallVector Operands(Op->op_begin(), Op->op_end()); SDValue EVec; bool SlideUp = false; @@ -4567,7 +4566,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, continue; } - // Start our sequence with either a TA splat or a reduction result in the + // Start our sequence with either a TA splat or extract source in the // hopes that hardware is able to recognize there's no dependency on the // prior value of our temporary register. if (!Vec) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index face756283c33..b98a8521964e7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1853,6 +1853,28 @@ define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, d ret <8 x double> %v7 } +define <8 x double> @buildvec_slideup_trailing_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) { +; CHECK-LABEL: buildvec_slideup_trailing_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma +; CHECK-NEXT: vfslide1up.vf v10, v8, fa4 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa3 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa2 +; CHECK-NEXT: vfslide1up.vf v12, v10, fa1 +; CHECK-NEXT: vfslide1up.vf v8, v12, fa0 +; CHECK-NEXT: ret + %v0 = insertelement <8 x double> poison, double %e0, i64 0 + %v1 = insertelement <8 x double> %v0, double %e1, i64 1 + %v2 = insertelement <8 x double> %v1, double %e2, i64 2 + %v3 = insertelement <8 x double> %v2, double %e3, i64 3 + %v4 = insertelement <8 x double> %v3, double %e4, i64 4 + %e5 = extractelement <4 x double> %v, i64 0 + %v5 = insertelement <8 x double> %v4, double %e5, i64 5 + %v6 = insertelement <8 x double> %v5, double poison, i64 6 + %v7 = insertelement <8 x double> %v6, double poison, i64 7 + ret <8 x double> %v7 +} + ; Negative test for slideup lowering where the extract_element was not build_vector's last operand. define <8 x double> @buildvec_slideup_not_last_element(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e7) vscale_range(4, 128) { ; CHECK-LABEL: buildvec_slideup_not_last_element: From ed3f4566bc3bad7be9ea686413a5f762e4af27b3 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Fri, 22 Aug 2025 14:20:02 -0700 Subject: [PATCH 7/9] fixup! Address review comments and add more tests --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 34 +++++++-------- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 42 +++++++++++++++++++ 2 files changed, 59 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 4e697220cacee..86958201f4921 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4512,15 +4512,6 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, "Illegal type which will result in reserved encoding"); const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC; - auto getVSlide = [&](bool SlideUp, EVT ContainerVT, SDValue Passthru, - SDValue Vec, SDValue Offset, SDValue Mask, - SDValue VL) -> SDValue { - if (SlideUp) - return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, - Mask, VL, Policy); - return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, - Mask, VL, Policy); - }; // General case: splat the first operand and slide other operands down one // by one to form a vector. Alternatively, if the last operand is an @@ -4529,6 +4520,15 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, SmallVector Operands(Op->op_begin(), Op->op_end()); SDValue EVec; bool SlideUp = false; + auto getVSlide = [&](EVT ContainerVT, SDValue Passthru, SDValue Vec, + SDValue Offset, SDValue Mask, SDValue VL) -> SDValue { + if (SlideUp) + return getVSlideup(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, + Mask, VL, Policy); + return getVSlidedown(DAG, Subtarget, DL, ContainerVT, Passthru, Vec, Offset, + Mask, VL, Policy); + }; + // Find the first first non-undef from the tail. auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(), [](SDValue V) { return !V.isUndef(); }); @@ -4583,26 +4583,26 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (UndefCount) { const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); - Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, - Offset, Mask, VL); + Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask, + VL); UndefCount = 0; } - unsigned OpCode; + unsigned Opcode; if (VT.isFloatingPoint()) - OpCode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL; + Opcode = SlideUp ? RISCVISD::VFSLIDE1UP_VL : RISCVISD::VFSLIDE1DOWN_VL; else - OpCode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL; + Opcode = SlideUp ? RISCVISD::VSLIDE1UP_VL : RISCVISD::VSLIDE1DOWN_VL; if (!VT.isFloatingPoint()) V = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), V); - Vec = DAG.getNode(OpCode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, + Vec = DAG.getNode(Opcode, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, V, Mask, VL); } if (UndefCount) { const SDValue Offset = DAG.getConstant(UndefCount, DL, Subtarget.getXLenVT()); - Vec = getVSlide(SlideUp, ContainerVT, DAG.getUNDEF(ContainerVT), Vec, - Offset, Mask, VL); + Vec = getVSlide(ContainerVT, DAG.getUNDEF(ContainerVT), Vec, Offset, Mask, + VL); } return convertFromScalableVector(VT, Vec, DAG, Subtarget); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index b98a8521964e7..cd67fc31119f7 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1853,6 +1853,48 @@ define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, d ret <8 x double> %v7 } +define <8 x double> @buildvec_slideup_leading_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) { +; CHECK-LABEL: buildvec_slideup_leading_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma +; CHECK-NEXT: vfslide1up.vf v10, v8, fa4 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa3 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa2 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa1 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: ret + %v2 = insertelement <8 x double> poison, double %e0, i64 2 + %v3 = insertelement <8 x double> %v2, double %e1, i64 3 + %v4 = insertelement <8 x double> %v3, double %e2, i64 4 + %v5 = insertelement <8 x double> %v4, double %e3, i64 5 + %v6 = insertelement <8 x double> %v5, double %e4, i64 6 + %e5 = extractelement <4 x double> %v, i64 0 + %v7 = insertelement <8 x double> %v6, double %e5, i64 7 + ret <8 x double> %v7 +} + +define <8 x double> @buildvec_slideup_mid_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) { +; CHECK-LABEL: buildvec_slideup_mid_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma +; CHECK-NEXT: vfslide1up.vf v10, v8, fa4 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa3 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa2 +; CHECK-NEXT: vslideup.vi v8, v10, 2 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa1 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa0 +; CHECK-NEXT: ret + %v0 = insertelement <8 x double> poison, double %e0, i64 0 + %v1 = insertelement <8 x double> %v0, double %e1, i64 1 + %v4 = insertelement <8 x double> %v1, double %e2, i64 4 + %v5 = insertelement <8 x double> %v4, double %e3, i64 5 + %v6 = insertelement <8 x double> %v5, double %e4, i64 6 + %e5 = extractelement <4 x double> %v, i64 0 + %v7 = insertelement <8 x double> %v6, double %e5, i64 7 + ret <8 x double> %v7 +} + define <8 x double> @buildvec_slideup_trailing_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) { ; CHECK-LABEL: buildvec_slideup_trailing_undef: ; CHECK: # %bb.0: From 41f1a97069a224e719e819a3ea9ac8857334ffc1 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Thu, 28 Aug 2025 11:14:55 -0700 Subject: [PATCH 8/9] fixup! Limit the condition to build_vector with *all* operands being extraction from first element --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 26 +- .../RISCV/rvv/fixed-vectors-fp-buildvec.ll | 233 +++++++++--------- .../RISCV/rvv/fixed-vectors-int-buildvec.ll | 24 +- 3 files changed, 143 insertions(+), 140 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 86958201f4921..562fbae776051 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4514,9 +4514,15 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const unsigned Policy = RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC; // General case: splat the first operand and slide other operands down one - // by one to form a vector. Alternatively, if the last operand is an - // extraction from element 0 of a vector, we can use that vector as the start - // value and slide up instead of slide down. Such that we can avoid the splat. + // by one to form a vector. Alternatively, if every operand is an + // extraction from element 0 of a vector, we use that vector from the last + // extraction as the start value and slide up instead of slide down. Such that + // (1) we can avoid the initial splat (2) we can turn those vslide1up into + // vslideup of 1 later and eliminate the vector to scalar movement, which is + // something we cannot do with vslide1down/vslidedown. + // Of course, using vslide1up/vslideup might increase the register pressure, + // and that's why we conservatively limit to cases where every operands is an + // extraction from first element. SmallVector Operands(Op->op_begin(), Op->op_end()); SDValue EVec; bool SlideUp = false; @@ -4529,13 +4535,15 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, Mask, VL, Policy); }; - // Find the first first non-undef from the tail. - auto ItLastNonUndef = find_if(Operands.rbegin(), Operands.rend(), - [](SDValue V) { return !V.isUndef(); }); - if (ItLastNonUndef != Operands.rend()) { + // The reason we don't use all_of here is because we're also capturing EVec + // from the last non-undef operand. If the std::execution_policy of the + // underlying std::all_of is anything but std::sequenced_policy we might + // capture the wrong EVec. + for (SDValue V : Operands) { using namespace SDPatternMatch; - // Check if the last non-undef operand was an extraction. - SlideUp = sd_match(*ItLastNonUndef, m_ExtractElt(m_Value(EVec), m_Zero())); + SlideUp = V.isUndef() || sd_match(V, m_ExtractElt(m_Value(EVec), m_Zero())); + if (!SlideUp) + break; } if (SlideUp) { diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll index cd67fc31119f7..aa3b9abe3a7aa 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-buildvec.ll @@ -1829,123 +1829,8 @@ define <8 x double> @buildvec_v8f64_zvl512(double %e0, double %e1, double %e2, d ret <8 x double> %v7 } -define <8 x double> @buildvec_slideup(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e6) vscale_range(4, 128) { -; CHECK-LABEL: buildvec_slideup: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma -; CHECK-NEXT: vfslide1up.vf v10, v8, fa6 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa5 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa4 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa3 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa2 -; CHECK-NEXT: vfslide1up.vf v12, v10, fa1 -; CHECK-NEXT: vfslide1up.vf v8, v12, fa0 -; CHECK-NEXT: ret - %v0 = insertelement <8 x double> poison, double %e0, i64 0 - %v1 = insertelement <8 x double> %v0, double %e1, i64 1 - %v2 = insertelement <8 x double> %v1, double %e2, i64 2 - %v3 = insertelement <8 x double> %v2, double %e3, i64 3 - %v4 = insertelement <8 x double> %v3, double %e4, i64 4 - %v5 = insertelement <8 x double> %v4, double %e5, i64 5 - %v6 = insertelement <8 x double> %v5, double %e6, i64 6 - %e7 = extractelement <4 x double> %v, i64 0 - %v7 = insertelement <8 x double> %v6, double %e7, i64 7 - ret <8 x double> %v7 -} - -define <8 x double> @buildvec_slideup_leading_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) { -; CHECK-LABEL: buildvec_slideup_leading_undef: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma -; CHECK-NEXT: vfslide1up.vf v10, v8, fa4 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa3 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa2 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa1 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa0 -; CHECK-NEXT: vslideup.vi v8, v10, 2 -; CHECK-NEXT: ret - %v2 = insertelement <8 x double> poison, double %e0, i64 2 - %v3 = insertelement <8 x double> %v2, double %e1, i64 3 - %v4 = insertelement <8 x double> %v3, double %e2, i64 4 - %v5 = insertelement <8 x double> %v4, double %e3, i64 5 - %v6 = insertelement <8 x double> %v5, double %e4, i64 6 - %e5 = extractelement <4 x double> %v, i64 0 - %v7 = insertelement <8 x double> %v6, double %e5, i64 7 - ret <8 x double> %v7 -} - -define <8 x double> @buildvec_slideup_mid_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) { -; CHECK-LABEL: buildvec_slideup_mid_undef: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma -; CHECK-NEXT: vfslide1up.vf v10, v8, fa4 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa3 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa2 -; CHECK-NEXT: vslideup.vi v8, v10, 2 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa1 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa0 -; CHECK-NEXT: ret - %v0 = insertelement <8 x double> poison, double %e0, i64 0 - %v1 = insertelement <8 x double> %v0, double %e1, i64 1 - %v4 = insertelement <8 x double> %v1, double %e2, i64 4 - %v5 = insertelement <8 x double> %v4, double %e3, i64 5 - %v6 = insertelement <8 x double> %v5, double %e4, i64 6 - %e5 = extractelement <4 x double> %v, i64 0 - %v7 = insertelement <8 x double> %v6, double %e5, i64 7 - ret <8 x double> %v7 -} - -define <8 x double> @buildvec_slideup_trailing_undef(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4) vscale_range(4, 128) { -; CHECK-LABEL: buildvec_slideup_trailing_undef: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma -; CHECK-NEXT: vfslide1up.vf v10, v8, fa4 -; CHECK-NEXT: vfslide1up.vf v8, v10, fa3 -; CHECK-NEXT: vfslide1up.vf v10, v8, fa2 -; CHECK-NEXT: vfslide1up.vf v12, v10, fa1 -; CHECK-NEXT: vfslide1up.vf v8, v12, fa0 -; CHECK-NEXT: ret - %v0 = insertelement <8 x double> poison, double %e0, i64 0 - %v1 = insertelement <8 x double> %v0, double %e1, i64 1 - %v2 = insertelement <8 x double> %v1, double %e2, i64 2 - %v3 = insertelement <8 x double> %v2, double %e3, i64 3 - %v4 = insertelement <8 x double> %v3, double %e4, i64 4 - %e5 = extractelement <4 x double> %v, i64 0 - %v5 = insertelement <8 x double> %v4, double %e5, i64 5 - %v6 = insertelement <8 x double> %v5, double poison, i64 6 - %v7 = insertelement <8 x double> %v6, double poison, i64 7 - ret <8 x double> %v7 -} - -; Negative test for slideup lowering where the extract_element was not build_vector's last operand. -define <8 x double> @buildvec_slideup_not_last_element(<4 x double> %v, double %e0, double %e1, double %e2, double %e3, double %e4, double %e5, double %e7) vscale_range(4, 128) { -; CHECK-LABEL: buildvec_slideup_not_last_element: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 8, e64, m2, ta, ma -; CHECK-NEXT: vfmv.f.s ft0, v8 -; CHECK-NEXT: vfmv.v.f v8, fa0 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa1 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa2 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa3 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 -; CHECK-NEXT: vfslide1down.vf v8, v8, ft0 -; CHECK-NEXT: vfslide1down.vf v8, v8, fa6 -; CHECK-NEXT: ret - %v0 = insertelement <8 x double> poison, double %e0, i64 0 - %v1 = insertelement <8 x double> %v0, double %e1, i64 1 - %v2 = insertelement <8 x double> %v1, double %e2, i64 2 - %v3 = insertelement <8 x double> %v2, double %e3, i64 3 - %v4 = insertelement <8 x double> %v3, double %e4, i64 4 - %v5 = insertelement <8 x double> %v4, double %e5, i64 5 - %e6 = extractelement <4 x double> %v, i64 0 - %v6 = insertelement <8 x double> %v5, double %e6, i64 6 - %v7 = insertelement <8 x double> %v6, double %e7, i64 7 - ret <8 x double> %v7 -} - -define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind { -; CHECK-LABEL: buildvec_vfredusum: +define <4 x float> @buildvec_vfredusum_slideup(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind { +; CHECK-LABEL: buildvec_vfredusum_slideup: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 @@ -1972,8 +1857,118 @@ define <4 x float> @buildvec_vfredusum(float %start, <8 x float> %arg1, <8 x flo ret <4 x float> %255 } -define <4 x float> @buildvec_vfredosum(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind { -; CHECK-LABEL: buildvec_vfredosum: +define <8 x float> @buildvec_vfredusum_slideup_leading_undef(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind { +; CHECK-LABEL: buildvec_vfredusum_slideup_leading_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vfredusum.vs v9, v8, v16 +; CHECK-NEXT: vfredusum.vs v10, v10, v16 +; CHECK-NEXT: vfredusum.vs v11, v12, v16 +; CHECK-NEXT: vfredusum.vs v8, v14, v16 +; CHECK-NEXT: vfmv.f.s fa5, v9 +; CHECK-NEXT: vfmv.f.s fa4, v10 +; CHECK-NEXT: vfmv.f.s fa3, v11 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa3 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa4 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa5 +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: ret + %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) + %253 = insertelement <8 x float> poison, float %252, i64 4 + %254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2) + %255 = insertelement <8 x float> %253, float %254, i64 5 + %256 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3) + %257 = insertelement <8 x float> %255, float %256, i64 6 + %258 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4) + %259 = insertelement <8 x float> %257, float %258, i64 7 + ret <8 x float> %259 +} + +define <8 x float> @buildvec_vfredusum_slideup_trailing_undef(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind { +; CHECK-LABEL: buildvec_vfredusum_slideup_trailing_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vfredusum.vs v9, v8, v16 +; CHECK-NEXT: vfredusum.vs v10, v10, v16 +; CHECK-NEXT: vfredusum.vs v11, v12, v16 +; CHECK-NEXT: vfredusum.vs v8, v14, v16 +; CHECK-NEXT: vfmv.f.s fa5, v9 +; CHECK-NEXT: vfmv.f.s fa4, v10 +; CHECK-NEXT: vfmv.f.s fa3, v11 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa3 +; CHECK-NEXT: vfslide1up.vf v12, v10, fa4 +; CHECK-NEXT: vfslide1up.vf v8, v12, fa5 +; CHECK-NEXT: ret + %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) + %253 = insertelement <8 x float> poison, float %252, i64 0 + %254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2) + %255 = insertelement <8 x float> %253, float %254, i64 1 + %256 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3) + %257 = insertelement <8 x float> %255, float %256, i64 2 + %258 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4) + %259 = insertelement <8 x float> %257, float %258, i64 3 + ret <8 x float> %259 +} + +; Negative test case checking if we generate slideup only when all build_vec operands are extraction from the first vector element. +define <8 x float> @buildvec_vfredusum_slideup_not_extract_first(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind { +; CHECK-LABEL: buildvec_vfredusum_slideup_not_extract_first: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v10, fa0 +; CHECK-NEXT: vfredusum.vs v8, v8, v10 +; CHECK-NEXT: vfredusum.vs v9, v12, v10 +; CHECK-NEXT: vfredusum.vs v10, v14, v10 +; CHECK-NEXT: vfmv.f.s fa5, v9 +; CHECK-NEXT: vfmv.f.s fa4, v10 +; CHECK-NEXT: vrgather.vi v10, v8, 0 +; CHECK-NEXT: vfslide1down.vf v8, v10, fa0 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa5 +; CHECK-NEXT: vfslide1down.vf v8, v8, fa4 +; CHECK-NEXT: vslidedown.vi v8, v8, 4 +; CHECK-NEXT: ret + %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) + %253 = insertelement <8 x float> poison, float %252, i64 0 + %255 = insertelement <8 x float> %253, float %start, i64 1 + %256 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3) + %257 = insertelement <8 x float> %255, float %256, i64 2 + %258 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4) + %259 = insertelement <8 x float> %257, float %258, i64 3 + ret <8 x float> %259 +} + +define <8 x float> @buildvec_vfredusum_slideup_mid_undef(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind { +; CHECK-LABEL: buildvec_vfredusum_slideup_mid_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v16, fa0 +; CHECK-NEXT: vfredusum.vs v9, v8, v16 +; CHECK-NEXT: vfredusum.vs v10, v10, v16 +; CHECK-NEXT: vfredusum.vs v11, v12, v16 +; CHECK-NEXT: vfredusum.vs v8, v14, v16 +; CHECK-NEXT: vfmv.f.s fa5, v9 +; CHECK-NEXT: vfmv.f.s fa4, v10 +; CHECK-NEXT: vfmv.f.s fa3, v11 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa3 +; CHECK-NEXT: vslideup.vi v8, v10, 4 +; CHECK-NEXT: vfslide1up.vf v10, v8, fa4 +; CHECK-NEXT: vfslide1up.vf v8, v10, fa5 +; CHECK-NEXT: ret + %252 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg1) + %253 = insertelement <8 x float> poison, float %252, i64 0 + %254 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg2) + %255 = insertelement <8 x float> %253, float %254, i64 1 + %256 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg3) + %257 = insertelement <8 x float> %255, float %256, i64 6 + %258 = tail call reassoc float @llvm.vector.reduce.fadd.v8f32(float %start, <8 x float> %arg4) + %259 = insertelement <8 x float> %257, float %258, i64 7 + ret <8 x float> %259 +} + +define <4 x float> @buildvec_vfredosum_slideup(float %start, <8 x float> %arg1, <8 x float> %arg2, <8 x float> %arg3, <8 x float> %arg4) nounwind { +; CHECK-LABEL: buildvec_vfredosum_slideup: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; CHECK-NEXT: vfmv.s.f v16, fa0 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll index a02117fdd2833..eedf19c38766b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-buildvec.ll @@ -3416,8 +3416,8 @@ define <4 x i1> @buildvec_i1_splat(i1 %e1) { ret <4 x i1> %v4 } -define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind { -; RV32-LABEL: buildvec_vredsum: +define <4 x i32> @buildvec_vredsum_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind { +; RV32-LABEL: buildvec_vredsum_slideup: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vmv.s.x v16, zero @@ -3434,7 +3434,7 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RV32-NEXT: vslide1up.vx v8, v10, a0 ; RV32-NEXT: ret ; -; RV64V-ONLY-LABEL: buildvec_vredsum: +; RV64V-ONLY-LABEL: buildvec_vredsum_slideup: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64V-ONLY-NEXT: vmv.s.x v16, zero @@ -3451,7 +3451,7 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0 ; RV64V-ONLY-NEXT: ret ; -; RVA22U64-LABEL: buildvec_vredsum: +; RVA22U64-LABEL: buildvec_vredsum_slideup: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RVA22U64-NEXT: vmv.s.x v16, zero @@ -3472,7 +3472,7 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RVA22U64-NEXT: vslide1down.vx v8, v8, a1 ; RVA22U64-NEXT: ret ; -; RVA22U64-PACK-LABEL: buildvec_vredsum: +; RVA22U64-PACK-LABEL: buildvec_vredsum_slideup: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RVA22U64-PACK-NEXT: vmv.s.x v16, zero @@ -3491,7 +3491,7 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a1 ; RVA22U64-PACK-NEXT: ret ; -; RV64ZVE32-LABEL: buildvec_vredsum: +; RV64ZVE32-LABEL: buildvec_vredsum_slideup: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32-NEXT: vmv.s.x v16, zero @@ -3518,8 +3518,8 @@ define <4 x i32> @buildvec_vredsum(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ret <4 x i32> %255 } -define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind { -; RV32-LABEL: buildvec_vredmax: +define <4 x i32> @buildvec_vredmax_slideup(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> %arg2, <8 x i32> %arg3) nounwind { +; RV32-LABEL: buildvec_vredmax_slideup: ; RV32: # %bb.0: ; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV32-NEXT: vredmaxu.vs v8, v8, v8 @@ -3535,7 +3535,7 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RV32-NEXT: vslide1up.vx v8, v10, a0 ; RV32-NEXT: ret ; -; RV64V-ONLY-LABEL: buildvec_vredmax: +; RV64V-ONLY-LABEL: buildvec_vredmax_slideup: ; RV64V-ONLY: # %bb.0: ; RV64V-ONLY-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64V-ONLY-NEXT: vredmaxu.vs v8, v8, v8 @@ -3551,7 +3551,7 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RV64V-ONLY-NEXT: vslide1up.vx v8, v10, a0 ; RV64V-ONLY-NEXT: ret ; -; RVA22U64-LABEL: buildvec_vredmax: +; RVA22U64-LABEL: buildvec_vredmax_slideup: ; RVA22U64: # %bb.0: ; RVA22U64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RVA22U64-NEXT: vredmaxu.vs v8, v8, v8 @@ -3571,7 +3571,7 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RVA22U64-NEXT: vslide1down.vx v8, v8, a1 ; RVA22U64-NEXT: ret ; -; RVA22U64-PACK-LABEL: buildvec_vredmax: +; RVA22U64-PACK-LABEL: buildvec_vredmax_slideup: ; RVA22U64-PACK: # %bb.0: ; RVA22U64-PACK-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RVA22U64-PACK-NEXT: vredmaxu.vs v8, v8, v8 @@ -3589,7 +3589,7 @@ define <4 x i32> @buildvec_vredmax(<8 x i32> %arg0, <8 x i32> %arg1, <8 x i32> % ; RVA22U64-PACK-NEXT: vslide1down.vx v8, v8, a1 ; RVA22U64-PACK-NEXT: ret ; -; RV64ZVE32-LABEL: buildvec_vredmax: +; RV64ZVE32-LABEL: buildvec_vredmax_slideup: ; RV64ZVE32: # %bb.0: ; RV64ZVE32-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64ZVE32-NEXT: vredmaxu.vs v8, v8, v8 From 83f17473e3a6967cfc7f54f445d12779536104f8 Mon Sep 17 00:00:00 2001 From: Min-Yih Hsu Date: Tue, 2 Sep 2025 09:42:42 -0700 Subject: [PATCH 9/9] fixup! Fix code comments --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 562fbae776051..2149739443650 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4521,8 +4521,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // vslideup of 1 later and eliminate the vector to scalar movement, which is // something we cannot do with vslide1down/vslidedown. // Of course, using vslide1up/vslideup might increase the register pressure, - // and that's why we conservatively limit to cases where every operands is an - // extraction from first element. + // and that's why we conservatively limit to cases where every operand is an + // extraction from the first element. SmallVector Operands(Op->op_begin(), Op->op_end()); SDValue EVec; bool SlideUp = false;