[SDAG] Teach FoldConstantArithmetic to match splats inserted into vectors

MacDue · MacDue · commit addf8789c345 · 2025-10-20T09:54:14.000Z
This teaches FoldConstantArithmetic to match `insert_subvector undef, (splat X), N2`
as a splat of X. This pattern can occur for scalable vectors when a
fixed-length splat is inserted into an undef vector.

This allows the cases in `fixed-subvector-insert-into-scalable.ll` to be
constant-folded (where previously they would all be computed at runtime).
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -7338,16 +7338,23 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
            Op.getValueType().getVectorElementCount() == NumElts;
   };
 
-  auto IsBuildVectorSplatVectorOrUndef = [](const SDValue &Op) {
+  // UNDEF: folds to undef
+  // BUILD_VECTOR: may have constant elements
+  // SPLAT_VECTOR: could be a splat of a constant
+  // INSERT_SUBVECTOR: could be inserting a constant splat into an undef vector
+  // - This pattern occurs when a fixed-length vector splat is inserted into
+  //   a scalable vector
+  auto VectorOpMayConstantFold = [](const SDValue &Op) {
     return Op.isUndef() || Op.getOpcode() == ISD::CONDCODE ||
            Op.getOpcode() == ISD::BUILD_VECTOR ||
-           Op.getOpcode() == ISD::SPLAT_VECTOR;
+           Op.getOpcode() == ISD::SPLAT_VECTOR ||
+           Op.getOpcode() == ISD::INSERT_SUBVECTOR;
   };
 
   // All operands must be vector types with the same number of elements as
   // the result type and must be either UNDEF or a build/splat vector
   // or UNDEF scalars.
-  if (!llvm::all_of(Ops, IsBuildVectorSplatVectorOrUndef) ||
+  if (!llvm::all_of(Ops, VectorOpMayConstantFold) ||
       !llvm::all_of(Ops, IsScalarOrSameVectorSize))
     return SDValue();
 
@@ -7374,22 +7381,39 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
   // a combination of BUILD_VECTOR and SPLAT_VECTOR.
   unsigned NumVectorElts = NumElts.isScalable() ? 1 : NumElts.getFixedValue();
 
+  // Preprocess insert_subvector to avoid repeatedly matching the splat.
+  SmallVector<SDValue, 4> PreprocessedOps;
+  for (SDValue Op : Ops) {
+    if (Op.getOpcode() == ISD::INSERT_SUBVECTOR) {
+      // match: `insert_subvector undef, (splat X), N2` as `splat X`
+      SDValue N0 = Op.getOperand(0);
+      auto* BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1));
+      if (!N0.isUndef() || !BV || !(Op = BV->getSplatValue()))
+          return SDValue();
+    }
+    PreprocessedOps.push_back(Op);
+  }
+
   // Constant fold each scalar lane separately.
   SmallVector<SDValue, 4> ScalarResults;
   for (unsigned I = 0; I != NumVectorElts; I++) {
     SmallVector<SDValue, 4> ScalarOps;
-    for (SDValue Op : Ops) {
+    for (SDValue Op : PreprocessedOps) {
       EVT InSVT = Op.getValueType().getScalarType();
       if (Op.getOpcode() != ISD::BUILD_VECTOR &&
-          Op.getOpcode() != ISD::SPLAT_VECTOR) {
+          Op.getOpcode() != ISD::SPLAT_VECTOR &&
+          Op.getOpcode() != ISD::INSERT_SUBVECTOR) {
         if (Op.isUndef())
           ScalarOps.push_back(getUNDEF(InSVT));
         else
           ScalarOps.push_back(Op);
         continue;
       }
 
-      SDValue ScalarOp =
+      // insert_subvector has been preprocessed, so if it was of the form
+      // `insert_subvector undef, (splat X), N2`, it has been replaced with the
+      // splat value (X).
+      SDValue ScalarOp = Op.getOpcode() == ISD::INSERT_SUBVECTOR ? Op :
           Op.getOperand(Op.getOpcode() == ISD::SPLAT_VECTOR ? 0 : I);
       EVT ScalarVT = ScalarOp.getValueType();
 
diff --git a/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll b/llvm/test/CodeGen/AArch64/fixed-subvector-insert-into-scalable.ll
@@ -4,13 +4,7 @@
 define <vscale x 4 x i32> @insert_div() {
 ; CHECK-LABEL: insert_div:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    mov w8, #43691 // =0xaaab
-; CHECK-NEXT:    movi v0.4s, #9
-; CHECK-NEXT:    ptrue p0.s
-; CHECK-NEXT:    movk w8, #43690, lsl #16
-; CHECK-NEXT:    mov z1.s, w8
-; CHECK-NEXT:    umulh z0.s, p0/m, z0.s, z1.s
-; CHECK-NEXT:    lsr z0.s, z0.s, #1
+; CHECK-NEXT:    mov z0.s, #3 // =0x3
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 9), i64 0)
@@ -21,8 +15,7 @@ entry:
 define <vscale x 4 x i32> @insert_mul() {
 ; CHECK-LABEL: insert_mul:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v0.4s, #1
-; CHECK-NEXT:    mul z0.s, z0.s, #7
+; CHECK-NEXT:    mov z0.s, #7 // =0x7
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 1), i64 0)
@@ -33,8 +26,7 @@ entry:
 define <vscale x 4 x i32> @insert_add() {
 ; CHECK-LABEL: insert_add:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v0.4s, #5
-; CHECK-NEXT:    add z0.s, z0.s, #11 // =0xb
+; CHECK-NEXT:    mov z0.s, #16 // =0x10
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 5), i64 0)
@@ -45,8 +37,7 @@ entry:
 define <vscale x 4 x i32> @insert_sub() {
 ; CHECK-LABEL: insert_sub:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    movi v0.4s, #11
-; CHECK-NEXT:    sub z0.s, z0.s, #11 // =0xb
+; CHECK-NEXT:    movi v0.2d, #0000000000000000
 ; CHECK-NEXT:    ret
 entry:
   %0 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> splat (i32 11), i64 0)