[LLVM][InstCombine][AArch64] sve.dup(V, all_active, S) ==> splat(S) (#170292)

paulwalker-arm · web-flow · commit b4b369a6bf2d · 2025-12-04T10:58:26.000Z
Also refactors the rest of instCombineSVEDup to simplify the code.
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -1891,25 +1891,23 @@ static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
 
 static std::optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
                                                       IntrinsicInst &II) {
-  IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
-  if (!Pg)
-    return std::nullopt;
+  Value *Pg = II.getOperand(1);
 
-  if (Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
-    return std::nullopt;
+  // sve.dup(V, all_active, X) ==> splat(X)
+  if (isAllActivePredicate(Pg)) {
+    auto *RetTy = cast<ScalableVectorType>(II.getType());
+    Value *Splat = IC.Builder.CreateVectorSplat(RetTy->getElementCount(),
+                                                II.getArgOperand(2));
+    return IC.replaceInstUsesWith(II, Splat);
+  }
 
-  const auto PTruePattern =
-      cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
-  if (PTruePattern != AArch64SVEPredPattern::vl1)
+  if (!match(Pg, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
+                     m_SpecificInt(AArch64SVEPredPattern::vl1))))
     return std::nullopt;
 
-  // The intrinsic is inserting into lane zero so use an insert instead.
-  auto *IdxTy = Type::getInt64Ty(II.getContext());
-  auto *Insert = InsertElementInst::Create(
-      II.getArgOperand(0), II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
-  Insert->insertBefore(II.getIterator());
-  Insert->takeName(&II);
-
+  // sve.dup(V, sve.ptrue(vl1), X) ==> insertelement V, X, 0
+  Value *Insert = IC.Builder.CreateInsertElement(
+      II.getArgOperand(0), II.getArgOperand(2), uint64_t(0));
   return IC.replaceInstUsesWith(II, Insert);
 }
 
diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll
@@ -55,15 +55,40 @@ define <vscale x 8 x i16> @dupx_splat_convert(i16 %s) #0 {
   ret <vscale x 8 x i16> %splat
 }
 
-declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
+define <vscale x 16 x i8> @dup_all_active_i8(<vscale x 16 x i8> %v, i8 %s) #0 {
+; CHECK-LABEL: @dup_all_active_i8(
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[S:%.*]], i64 0
+; CHECK-NEXT:    [[INSERT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 16 x i8> [[INSERT]]
+;
+  %insert = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %v, <vscale x 16 x i1> splat(i1 true), i8 %s)
+  ret <vscale x 16 x i8> %insert
+}
 
-declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
-declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
+define <vscale x 4 x i32> @dup_all_active_i32(<vscale x 4 x i32> %v) #0 {
+; CHECK-LABEL: @dup_all_active_i32(
+; CHECK-NEXT:    ret <vscale x 4 x i32> splat (i32 73)
+;
+  %insert = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i1> splat(i1 true), i32 73)
+  ret <vscale x 4 x i32> %insert
+}
 
-declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
-declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
+define <vscale x 4 x float> @dup_all_active_f32(<vscale x 4 x float> %v, float %s) #0 {
+; CHECK-LABEL: @dup_all_active_f32(
+; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[S:%.*]], i64 0
+; CHECK-NEXT:    [[INSERT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
+; CHECK-NEXT:    ret <vscale x 4 x float> [[INSERT]]
+;
+  %insert = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> %v, <vscale x 4 x i1> splat(i1 true), float %s)
+  ret <vscale x 4 x float> %insert
+}
 
-declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
-declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
+define <vscale x 2 x double> @dup_all_active_f64(<vscale x 2 x double> %v) #0 {
+; CHECK-LABEL: @dup_all_active_f64(
+; CHECK-NEXT:    ret <vscale x 2 x double> splat (double 1.000000e+00)
+;
+  %insert = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> %v, <vscale x 2 x i1> splat(i1 true), double 1.0)
+  ret <vscale x 2 x double> %insert
+}
 
 attributes #0 = { "target-features"="+sve" }