From ab283cbe261f3627c79b62de2611675999bb2d3d Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Thu, 6 Feb 2025 09:33:39 -0800 Subject: [PATCH 1/2] [RISCV] Allow undef prefix for local repeating VLA shuffle lowering Implement the first TODO from #125735, and minorly cleanup code using same style as the recently landed strict prefix case. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 17 ++++--- .../RISCV/rvv/fixed-vectors-shuffle-rotate.ll | 49 ++++++++++++++++--- 2 files changed, 52 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 165c71d8e03f1..ec74523a31227 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5338,13 +5338,19 @@ static SDValue lowerDisjointIndicesShuffle(ShuffleVectorSDNode *SVN, /// Is this mask local (i.e. elements only move within their local span), and /// repeating (that is, the same rearrangement is being done within each span)? static bool isLocalRepeatingShuffle(ArrayRef Mask, int Span) { - // TODO: Could improve the case where undef elements exist in the first span. + SmallVector LowSpan(Span, -1); for (auto [I, M] : enumerate(Mask)) { if (M == -1) continue; - int ChunkLo = I - (I % Span); + int SpanIdx = I % Span; + int ChunkLo = I - SpanIdx; int ChunkHi = ChunkLo + Span; - if (M < ChunkLo || M >= ChunkHi || M - ChunkLo != Mask[I % Span]) + if (M < ChunkLo || M >= ChunkHi) + return false; + int Expected = M - ChunkLo; + if (LowSpan[SpanIdx] == -1) + LowSpan[SpanIdx] = Expected; + if (LowSpan[SpanIdx] != Expected) return false; } return true; @@ -5742,9 +5748,8 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, // If we have a locally repeating mask, then we can reuse the first register // in the index register group for all registers within the source register // group. TODO: This generalizes to m2, and m4. - const MVT M1VT = getLMUL1VT(ContainerVT); - auto VLMAX = RISCVTargetLowering::computeVLMAXBounds(M1VT, Subtarget).first; - if (ContainerVT.bitsGT(M1VT) && isLocalRepeatingShuffle(Mask, VLMAX)) { + if (NumElts > MinVLMAX && isLocalRepeatingShuffle(Mask, MinVLMAX)) { + const MVT M1VT = getLMUL1VT(ContainerVT); EVT SubIndexVT = M1VT.changeVectorElementType(IndexVT.getScalarType()); SDValue SubIndex = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubIndexVT, LHSIndices, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll index 86d8a275a9055..c9fe39685fbc6 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-rotate.ll @@ -969,11 +969,44 @@ define <8 x i64> @shuffle_v8i64_as_i128(<8 x i64> %v) { ret <8 x i64> %shuffle } -define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) { -; CHECK-LABEL: shuffle_v8i64_as_i256: +; Test case where first span has undefs +define <8 x i64> @shuffle_v8i64_as_i128_2(<8 x i64> %v) { +; CHECK-LABEL: shuffle_v8i64_as_i128_2: ; CHECK: # %bb.0: ; CHECK-NEXT: lui a0, %hi(.LCPI30_0) ; CHECK-NEXT: addi a0, a0, %lo(.LCPI30_0) +; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-NEXT: vle16.v v16, (a0) +; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; CHECK-NEXT: vrgatherei16.vv v13, v9, v16 +; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 +; CHECK-NEXT: vrgatherei16.vv v14, v10, v16 +; CHECK-NEXT: vrgatherei16.vv v15, v11, v16 +; CHECK-NEXT: vmv4r.v v8, v12 +; CHECK-NEXT: ret +; +; ZVKB-V-LABEL: shuffle_v8i64_as_i128_2: +; ZVKB-V: # %bb.0: +; ZVKB-V-NEXT: lui a0, %hi(.LCPI30_0) +; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI30_0) +; ZVKB-V-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; ZVKB-V-NEXT: vle16.v v16, (a0) +; ZVKB-V-NEXT: vsetvli a0, zero, e64, m1, ta, ma +; ZVKB-V-NEXT: vrgatherei16.vv v13, v9, v16 +; ZVKB-V-NEXT: vrgatherei16.vv v12, v8, v16 +; ZVKB-V-NEXT: vrgatherei16.vv v14, v10, v16 +; ZVKB-V-NEXT: vrgatherei16.vv v15, v11, v16 +; ZVKB-V-NEXT: vmv4r.v v8, v12 +; ZVKB-V-NEXT: ret + %shuffle = shufflevector <8 x i64> %v, <8 x i64> poison, <8 x i32> + ret <8 x i64> %shuffle +} + +define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) { +; CHECK-LABEL: shuffle_v8i64_as_i256: +; CHECK: # %bb.0: +; CHECK-NEXT: lui a0, %hi(.LCPI31_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0) ; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; CHECK-NEXT: vle16.v v16, (a0) ; CHECK-NEXT: vrgatherei16.vv v12, v8, v16 @@ -982,8 +1015,8 @@ define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) { ; ; ZVKB-V-LABEL: shuffle_v8i64_as_i256: ; ZVKB-V: # %bb.0: -; ZVKB-V-NEXT: lui a0, %hi(.LCPI30_0) -; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI30_0) +; ZVKB-V-NEXT: lui a0, %hi(.LCPI31_0) +; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI31_0) ; ZVKB-V-NEXT: vsetivli zero, 8, e64, m4, ta, ma ; ZVKB-V-NEXT: vle16.v v16, (a0) ; ZVKB-V-NEXT: vrgatherei16.vv v12, v8, v16 @@ -996,8 +1029,8 @@ define <8 x i64> @shuffle_v8i64_as_i256(<8 x i64> %v) { define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0) { ; CHECK-LABEL: shuffle_v8i64_as_i256_zvl256b: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI31_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI31_0) +; CHECK-NEXT: lui a0, %hi(.LCPI32_0) +; CHECK-NEXT: addi a0, a0, %lo(.LCPI32_0) ; CHECK-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; CHECK-NEXT: vle16.v v12, (a0) ; CHECK-NEXT: vsetvli a0, zero, e64, m1, ta, ma @@ -1008,8 +1041,8 @@ define <8 x i64> @shuffle_v8i64_as_i256_zvl256b(<8 x i64> %v) vscale_range(4,0) ; ; ZVKB-V-LABEL: shuffle_v8i64_as_i256_zvl256b: ; ZVKB-V: # %bb.0: -; ZVKB-V-NEXT: lui a0, %hi(.LCPI31_0) -; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI31_0) +; ZVKB-V-NEXT: lui a0, %hi(.LCPI32_0) +; ZVKB-V-NEXT: addi a0, a0, %lo(.LCPI32_0) ; ZVKB-V-NEXT: vsetivli zero, 8, e16, mf2, ta, ma ; ZVKB-V-NEXT: vle16.v v12, (a0) ; ZVKB-V-NEXT: vsetvli a0, zero, e64, m1, ta, ma From 842f79815d06459441ed410bc18d01477910ce77 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Tue, 11 Feb 2025 13:41:04 -0800 Subject: [PATCH 2/2] Address review comment --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ec74523a31227..c93e869e2f827 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -5342,12 +5342,10 @@ static bool isLocalRepeatingShuffle(ArrayRef Mask, int Span) { for (auto [I, M] : enumerate(Mask)) { if (M == -1) continue; - int SpanIdx = I % Span; - int ChunkLo = I - SpanIdx; - int ChunkHi = ChunkLo + Span; - if (M < ChunkLo || M >= ChunkHi) + if ((M / Span) != (int)(I / Span)) return false; - int Expected = M - ChunkLo; + int SpanIdx = I % Span; + int Expected = M % Span; if (LowSpan[SpanIdx] == -1) LowSpan[SpanIdx] = Expected; if (LowSpan[SpanIdx] != Expected)