Skip to content

Commit b4b369a

Browse files
[LLVM][InstCombine][AArch64] sve.dup(V, all_active, S) ==> splat(S) (#170292)
Also refactors the rest of instCombineSVEDup to simplify the code.
1 parent 0458fe5 commit b4b369a

File tree

2 files changed

+45
-22
lines changed

2 files changed

+45
-22
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1891,25 +1891,23 @@ static std::optional<Instruction *> instCombineSVESel(InstCombiner &IC,
18911891

18921892
static std::optional<Instruction *> instCombineSVEDup(InstCombiner &IC,
18931893
IntrinsicInst &II) {
1894-
IntrinsicInst *Pg = dyn_cast<IntrinsicInst>(II.getArgOperand(1));
1895-
if (!Pg)
1896-
return std::nullopt;
1894+
Value *Pg = II.getOperand(1);
18971895

1898-
if (Pg->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue)
1899-
return std::nullopt;
1896+
// sve.dup(V, all_active, X) ==> splat(X)
1897+
if (isAllActivePredicate(Pg)) {
1898+
auto *RetTy = cast<ScalableVectorType>(II.getType());
1899+
Value *Splat = IC.Builder.CreateVectorSplat(RetTy->getElementCount(),
1900+
II.getArgOperand(2));
1901+
return IC.replaceInstUsesWith(II, Splat);
1902+
}
19001903

1901-
const auto PTruePattern =
1902-
cast<ConstantInt>(Pg->getOperand(0))->getZExtValue();
1903-
if (PTruePattern != AArch64SVEPredPattern::vl1)
1904+
if (!match(Pg, m_Intrinsic<Intrinsic::aarch64_sve_ptrue>(
1905+
m_SpecificInt(AArch64SVEPredPattern::vl1))))
19041906
return std::nullopt;
19051907

1906-
// The intrinsic is inserting into lane zero so use an insert instead.
1907-
auto *IdxTy = Type::getInt64Ty(II.getContext());
1908-
auto *Insert = InsertElementInst::Create(
1909-
II.getArgOperand(0), II.getArgOperand(2), ConstantInt::get(IdxTy, 0));
1910-
Insert->insertBefore(II.getIterator());
1911-
Insert->takeName(&II);
1912-
1908+
// sve.dup(V, sve.ptrue(vl1), X) ==> insertelement V, X, 0
1909+
Value *Insert = IC.Builder.CreateInsertElement(
1910+
II.getArgOperand(0), II.getArgOperand(2), uint64_t(0));
19131911
return IC.replaceInstUsesWith(II, Insert);
19141912
}
19151913

llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-opts-dup.ll

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,15 +55,40 @@ define <vscale x 8 x i16> @dupx_splat_convert(i16 %s) #0 {
5555
ret <vscale x 8 x i16> %splat
5656
}
5757

58-
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.x.nxv8i16(i16)
58+
define <vscale x 16 x i8> @dup_all_active_i8(<vscale x 16 x i8> %v, i8 %s) #0 {
59+
; CHECK-LABEL: @dup_all_active_i8(
60+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[S:%.*]], i64 0
61+
; CHECK-NEXT: [[INSERT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer
62+
; CHECK-NEXT: ret <vscale x 16 x i8> [[INSERT]]
63+
;
64+
%insert = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %v, <vscale x 16 x i1> splat(i1 true), i8 %s)
65+
ret <vscale x 16 x i8> %insert
66+
}
5967

60-
declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8)
61-
declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16)
68+
define <vscale x 4 x i32> @dup_all_active_i32(<vscale x 4 x i32> %v) #0 {
69+
; CHECK-LABEL: @dup_all_active_i32(
70+
; CHECK-NEXT: ret <vscale x 4 x i32> splat (i32 73)
71+
;
72+
%insert = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> %v, <vscale x 4 x i1> splat(i1 true), i32 73)
73+
ret <vscale x 4 x i32> %insert
74+
}
6275

63-
declare <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.nxv16i1(i32)
64-
declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
76+
define <vscale x 4 x float> @dup_all_active_f32(<vscale x 4 x float> %v, float %s) #0 {
77+
; CHECK-LABEL: @dup_all_active_f32(
78+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x float> poison, float [[S:%.*]], i64 0
79+
; CHECK-NEXT: [[INSERT:%.*]] = shufflevector <vscale x 4 x float> [[DOTSPLATINSERT]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer
80+
; CHECK-NEXT: ret <vscale x 4 x float> [[INSERT]]
81+
;
82+
%insert = tail call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> %v, <vscale x 4 x i1> splat(i1 true), float %s)
83+
ret <vscale x 4 x float> %insert
84+
}
6585

66-
declare <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1>)
67-
declare <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1>)
86+
define <vscale x 2 x double> @dup_all_active_f64(<vscale x 2 x double> %v) #0 {
87+
; CHECK-LABEL: @dup_all_active_f64(
88+
; CHECK-NEXT: ret <vscale x 2 x double> splat (double 1.000000e+00)
89+
;
90+
%insert = tail call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> %v, <vscale x 2 x i1> splat(i1 true), double 1.0)
91+
ret <vscale x 2 x double> %insert
92+
}
6893

6994
attributes #0 = { "target-features"="+sve" }

0 commit comments

Comments
 (0)