llvm · HanKuanChen · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024 · Sep 25, 2024
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5130,6 +5130,40 @@ static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
   return DAG.getBitcast(VT, Rotate);
 }
 
+// e.g.,
+// t10 = insert_subvector undef:v16i32, t4, Constant:i64<0>
+// vector_shuffle<0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3> t10, undef:v16i32
+// ->
+// concat_vectors t4, t4, t4, t4
+static SDValue
+lowerVECTOR_SHUFFLEAsCONCAT_VECTORS(ShuffleVectorSDNode *SVN, SelectionDAG &DAG,
+                                    const RISCVSubtarget &Subtarget) {
+  assert(SVN->getOperand(1).isUndef());
+  SDValue V1 = SVN->getOperand(0);
+  if (V1.getOpcode() != ISD::INSERT_SUBVECTOR)
+    return SDValue();
+  if (!V1.getOperand(0).isUndef())
+    return SDValue();
+  SDValue InsertValue = V1.getOperand(1);
+  MVT SubVecVT = InsertValue.getSimpleValueType();
+  unsigned SubVecNumElements = SubVecVT.getVectorNumElements();
+  uint64_t InsertIndex = cast<ConstantSDNode>(V1.getOperand(2))->getZExtValue();
+  if (InsertIndex != 0)
+    return SDValue();
+  ArrayRef<int> Mask = SVN->getMask();
+  if (Mask.size() % SubVecNumElements != 0)
+    return SDValue();
+  SmallVector<int> RepeatedPattern(
+      createSequentialMask(0, SubVecNumElements, 0));
+  // Check the Mask repeatedly uses the same subvector.
+  for (unsigned I = 0; I != Mask.size(); I += SubVecNumElements)
+    if (!Mask.slice(I, SubVecNumElements).equals(RepeatedPattern))
+      return SDValue();
+  SDLoc DL(SVN);
+  SmallVector<SDValue> Ops(Mask.size() / SubVecNumElements, InsertValue);
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, SVN->getSimpleValueType(0), Ops);
+}
+
 // If compiling with an exactly known VLEN, see if we can split a
 // shuffle on m2 or larger into a small number of m1 sized shuffles
 // which write each destination registers exactly once.
@@ -5432,6 +5466,9 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
     if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
       return V;
 
+    if (SDValue V = lowerVECTOR_SHUFFLEAsCONCAT_VECTORS(SVN, DAG, Subtarget))
+      return V;
+
     if (VT.getScalarSizeInBits() == 8 &&
         any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
       // On such a vector we're unable to use i8 as the index type.

diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat-vectors.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv64 -mattr=+v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s
+
+define <16 x float> @test1(<8 x float> %0) {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsetivli zero, 16, e32, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v9, v8, 8
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+entry:
+  %1 = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> poison, i64 8)
+  %2 = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> %1, <8 x float> %0, i64 0)
+  %3 = shufflevector <16 x float> %2, <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x float> %3
+}
+
+define <16 x i32> @test2(<4 x i32> %0) {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsetivli zero, 8, e32, mf2, ta, ma
+; CHECK-NEXT:    vslideup.vi v9, v8, 4
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vsetivli zero, 16, e32, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 8
+; CHECK-NEXT:    ret
+entry:
+  %1 = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> poison, i64 4)
+  %2 = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> %1, <4 x i32> poison, i64 8)
+  %3 = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> %2, <4 x i32> poison, i64 12)
+  %4 = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> %3, <4 x i32> %0, i64 0)
+  %5 = shufflevector <16 x i32> %4, <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <16 x i32> %5
+}