diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index bf822eb2c6eeb..ad5bec48449f9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -5130,6 +5130,37 @@ static SDValue lowerVECTOR_SHUFFLEAsRotate(ShuffleVectorSDNode *SVN,
   return DAG.getBitcast(VT, Rotate);
 }
 
+// e.g.,
+// t10 = insert_subvector undef:v16i32, t4, Constant:i64<0>
+// vector_shuffle<0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3> t10, undef:v16i32
+// ->
+// concat_vectors t4, t4, t4, t4
+static SDValue
+lowerVECTOR_SHUFFLEAsCONCAT_VECTORS(ShuffleVectorSDNode *SVN, SelectionDAG &DAG,
+                                    const RISCVSubtarget &Subtarget) {
+  assert(SVN->getOperand(1).isUndef());
+  SDValue V1 = SVN->getOperand(0);
+  if (V1.getOpcode() != ISD::INSERT_SUBVECTOR)
+    return SDValue();
+  if (!V1.getOperand(0).isUndef() || V1.getConstantOperandVal(2) != 0)
+    return SDValue();
+  SDValue InsertValue = V1.getOperand(1);
+  MVT SubVecVT = InsertValue.getSimpleValueType();
+  unsigned SubVecNumElements = SubVecVT.getVectorNumElements();
+  ArrayRef<int> Mask = SVN->getMask();
+  if (Mask.size() % SubVecNumElements != 0)
+    return SDValue();
+  SmallVector<int> RepeatedPattern(
+      createSequentialMask(0, SubVecNumElements, 0));
+  // Check the Mask repeatedly uses the same subvector.
+  for (unsigned I = 0; I != Mask.size(); I += SubVecNumElements)
+    if (!Mask.slice(I, SubVecNumElements).equals(RepeatedPattern))
+      return SDValue();
+  SDLoc DL(SVN);
+  SmallVector<SDValue> Ops(Mask.size() / SubVecNumElements, InsertValue);
+  return DAG.getNode(ISD::CONCAT_VECTORS, DL, SVN->getSimpleValueType(0), Ops);
+}
+
 // If compiling with an exactly known VLEN, see if we can split a
 // shuffle on m2 or larger into a small number of m1 sized shuffles
 // which write each destination registers exactly once.
@@ -5432,6 +5463,9 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
     if (SDValue V = lowerVECTOR_SHUFFLEAsRotate(SVN, DAG, Subtarget))
       return V;
 
+    if (SDValue V = lowerVECTOR_SHUFFLEAsCONCAT_VECTORS(SVN, DAG, Subtarget))
+      return V;
+
     if (VT.getScalarSizeInBits() == 8 &&
         any_of(Mask, [&](const auto &Idx) { return Idx > 255; })) {
       // On such a vector we're unable to use i8 as the index type.
diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat-vectors.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat-vectors.ll
new file mode 100644
index 0000000000000..8cee056454e12
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-concat-vectors.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv64 -mattr=+v -riscv-v-vector-bits-min=512 -verify-machineinstrs < %s | FileCheck %s
+
+define <16 x float> @test1(<8 x float> %0) {
+; CHECK-LABEL: test1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsetivli zero, 16, e32, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v9, v8, 8
+; CHECK-NEXT:    vmv.v.v v8, v9
+; CHECK-NEXT:    ret
+entry:
+  %1 = call <16 x float> @llvm.vector.insert.v16f32.v8f32(<16 x float> poison, <8 x float> %0, i64 0)
+  %2 = shufflevector <16 x float> %1, <16 x float> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+  ret <16 x float> %2
+}
+
+define <16 x i32> @test2(<4 x i32> %0) {
+; CHECK-LABEL: test2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vmv1r.v v9, v8
+; CHECK-NEXT:    vsetivli zero, 8, e32, mf2, ta, ma
+; CHECK-NEXT:    vslideup.vi v9, v8, 4
+; CHECK-NEXT:    vmv1r.v v8, v9
+; CHECK-NEXT:    vsetivli zero, 16, e32, m1, ta, ma
+; CHECK-NEXT:    vslideup.vi v8, v9, 8
+; CHECK-NEXT:    ret
+entry:
+  %1 = call <16 x i32> @llvm.vector.insert.v16i32.v4i32(<16 x i32> poison, <4 x i32> %0, i64 0)
+  %2 = shufflevector <16 x i32> %1, <16 x i32> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <16 x i32> %2
+}