-
Notifications
You must be signed in to change notification settings - Fork 15.5k
[SLP]Enable float point math ops as copyables elements. #169857
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[SLP]Enable float point math ops as copyables elements. #169857
Conversation
Created using spr 1.3.7
|
@llvm/pr-subscribers-llvm-ir @llvm/pr-subscribers-llvm-transforms Author: Alexey Bataev (alexey-bataev) ChangesPatch enables support for float point math operations as base Patch is 79.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169857.diff 16 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3b36ccbd677dc..c3f8689e62d53 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5325,13 +5325,14 @@ class slpvectorizer::BoUpSLP {
if (ScheduleCopyableDataMap.empty())
return false;
SmallDenseMap<TreeEntry *, unsigned> PotentiallyReorderedEntriesCount;
- SmallDenseMap<const TreeEntry *, unsigned> OrderedEntriesCount;
+ ArrayRef<TreeEntry *> Entries = SLP.getTreeEntries(User);
+ if (Entries.empty())
+ return false;
+ unsigned CurNumOps = 0;
for (const Use &U : User->operands()) {
if (U.get() != Op)
continue;
- ArrayRef<TreeEntry *> Entries = SLP.getTreeEntries(User);
- if (Entries.empty())
- return false;
+ ++CurNumOps;
// Check all tree entries, if they have operands replaced by copyable
// data.
for (TreeEntry *TE : Entries) {
@@ -5363,28 +5364,28 @@ class slpvectorizer::BoUpSLP {
// reordered.
// Same applies even for non-commutative cmps, because we can invert
// their predicate potentially and, thus, reorder the operands.
+ constexpr unsigned NumCommutativeOps = 2;
bool IsCommutativeUser =
- ::isCommutative(User) ||
- ::isCommutative(TE->getMatchingMainOpOrAltOp(User), User);
- if (!IsCommutativeUser && !isa<CmpInst>(User)) {
- unsigned &OpCnt =
- OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
+ U.getOperandNo() < NumCommutativeOps &&
+ (::isCommutative(User) ||
+ ::isCommutative(TE->getMatchingMainOpOrAltOp(User), User));
+ bool IsCommutativeWithSameOps =
+ IsCommutativeUser &&
+ User->getNumOperands() >= NumCommutativeOps &&
+ User->getOperand(0) == User->getOperand(1);
+ if ((!IsCommutativeUser || IsCommutativeWithSameOps) &&
+ !isa<CmpInst>(User)) {
EdgeInfo EI(TE, U.getOperandNo());
- if (!getScheduleCopyableData(EI, Op))
+ if (CurNumOps != NumOps || getScheduleCopyableData(EI, Op))
continue;
- // Found copyable operand - continue.
- OpCnt += Inc;
- continue;
+ return false;
}
PotentiallyReorderedEntriesCount.try_emplace(TE, 0)
.first->getSecond() += Inc;
}
}
if (PotentiallyReorderedEntriesCount.empty())
- return all_of(OrderedEntriesCount,
- [&](const std::pair<const TreeEntry *, unsigned> &P) {
- return P.second == NumOps;
- });
+ return true;
// Check the commutative/cmp entries.
for (auto &P : PotentiallyReorderedEntriesCount) {
SmallPtrSet<Value *, 4> ParentsUniqueUsers;
@@ -5430,10 +5431,6 @@ class slpvectorizer::BoUpSLP {
return all_of(PotentiallyReorderedEntriesCount,
[&](const std::pair<const TreeEntry *, unsigned> &P) {
return P.second == NumOps - 1;
- }) &&
- all_of(OrderedEntriesCount,
- [&](const std::pair<const TreeEntry *, unsigned> &P) {
- return P.second == NumOps;
});
}
@@ -5655,6 +5652,7 @@ class slpvectorizer::BoUpSLP {
}
};
+ SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
for (ScheduleBundle *Bundle : Bundles) {
if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
break;
@@ -5662,7 +5660,6 @@ class slpvectorizer::BoUpSLP {
// Need to search for the lane since the tree entry can be
// reordered.
auto *It = find(Bundle->getTreeEntry()->Scalars, In);
- SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
bool IsNonSchedulableWithParentPhiNode =
Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
Bundle->getTreeEntry()->UserTreeIndex &&
@@ -10865,7 +10862,9 @@ class InstructionsCompatibilityAnalysis {
Opcode == Instruction::LShr || Opcode == Instruction::Shl ||
Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
Opcode == Instruction::And || Opcode == Instruction::Or ||
- Opcode == Instruction::Xor;
+ Opcode == Instruction::Xor || Opcode == Instruction::FAdd ||
+ Opcode == Instruction::FSub || Opcode == Instruction::FMul ||
+ Opcode == Instruction::FDiv;
}
/// Identifies the best candidate value, which represents main opcode
@@ -11198,6 +11197,10 @@ class InstructionsCompatibilityAnalysis {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::FSub:
+ case Instruction::FDiv:
VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
break;
default:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll
index 0783a28f56d85..961662c664a31 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll
@@ -11,10 +11,10 @@ define void @p(double %0) {
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <2 x i32> <i32 1, i32 7>
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> zeroinitializer, [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double poison, double poison>, <4 x double> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP10:%.*]] = fmul <4 x double> zeroinitializer, [[TMP9]]
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, <4 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x double> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fptosi <4 x double> [[TMP12]] to <4 x i32>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll
index 0cc4d3db5c537..1abc16da77c8e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll
@@ -4,15 +4,16 @@
define void @test() {
; CHECK-LABEL: define void @test() {
; CHECK-NEXT: [[BB:.*]]:
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x float> <float poison, float 0.000000e+00, float poison, float poison>, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP9:%.*]], %[[BB1]] ]
-; CHECK-NEXT: [[FMUL:%.*]] = fmul float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[FMUL:%.*]] = sitofp i32 0 to float
+; CHECK-NEXT: [[SITOFP:%.*]] = sitofp i32 0 to float
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[SITOFP]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, <4 x i32> <i32 0, i32 0, i32 poison, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[FMUL]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP0]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
index d13a8578d1e00..c1cc3f2dfc9e5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -7,36 +7,30 @@
define void @main(i1 %arg) {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 %arg, label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
; CHECK: cond.true:
; CHECK-NEXT: unreachable
; CHECK: cond.end:
; CHECK-NEXT: br label [[INVOKE_CONT:%.*]]
; CHECK: invoke.cont:
-; CHECK-NEXT: br i1 %arg, label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]]
+; CHECK-NEXT: br i1 [[ARG]], label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]]
; CHECK: arrayctor.cont:
; CHECK-NEXT: [[AGG_TMP101211_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
; CHECK-NEXT: br label [[FOR_COND36_PREHEADER:%.*]]
; CHECK: for.cond36.preheader:
-; CHECK-NEXT: br i1 %arg, label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]]
+; CHECK-NEXT: br i1 [[ARG]], label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]]
; CHECK: cond.false51.us:
; CHECK-NEXT: unreachable
; CHECK: cond.true48.us:
-; CHECK-NEXT: br i1 %arg, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
+; CHECK-NEXT: br i1 [[ARG]], label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
; CHECK: cond.false66.us:
-; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, 0x3EB0C6F7A0B5ED8D
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double poison, double 0xBFA5CC2D1960285F>, double [[ADD_I276_US]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> <double 0.000000e+00, double 1.000000e-01>, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.400000e+02)
-; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 5.000000e+01, double 5.200000e+01>
-; CHECK-NEXT: store <2 x double> [[TMP3]], ptr undef, align 8
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> <double 2.000000e-01, double 3.000000e-01>, [[TMP1]]
-; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8
+; CHECK-NEXT: store <2 x double> <double 0x404900049667B5F2, double 0x404E0515D587DA7B>, ptr undef, align 8
+; CHECK-NEXT: store <2 x double> <double 2.000000e-07, double 0x3F91A436DC4B6CE6>, ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8
; CHECK-NEXT: ret void
; CHECK: cond.true63.us:
; CHECK-NEXT: unreachable
; CHECK: for.body42.lr.ph.us:
-; CHECK-NEXT: br i1 %arg, label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]]
+; CHECK-NEXT: br i1 [[ARG]], label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]]
; CHECK: _Z5clampd.exit.1:
; CHECK-NEXT: br label [[FOR_COND36_PREHEADER]]
;
@@ -96,7 +90,7 @@ _Z5clampd.exit.1:
define void @test(i1 %arg) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 %arg, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
; CHECK: if.then38:
; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
; CHECK-NEXT: store <2 x double> <double 0x3FFA356C1D8A7F76, double 0x3FFDC4F38B38BEF4>, ptr [[AGG_TMP74663_SROA_0_0_IDX]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
index 6d713e83bbf4e..ca65ff88a4b81 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
@@ -9,33 +9,38 @@ define void @test(ptr %nExp, float %0, i1 %cmp, float %1) {
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
; CHECK: [[IF_THEN]]:
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 3, i32 3>
-; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[DIV_2_I_I:%.*]] = fmul float [[TMP0]], 0.000000e+00
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP1]], i32 3
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP20]], <4 x i32> <i32 5, i32 1, i32 2, i32 3>
; CHECK-NEXT: br label %[[IF_END]]
; CHECK: [[IF_END]]:
-; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x float> [ [[TMP11]], %[[IF_THEN]] ], [ [[TMP3]], %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x float> [ [[TMP8]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x float> [ zeroinitializer, %[[IF_THEN]] ], [ <float 0x7FF8000000000000, float 1.000000e+00>, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x float> [ [[TMP7]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP14]], <2 x float> <float poison, float 0.000000e+00>, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = phi float [ 0.000000e+00, %[[IF_THEN]] ], [ 0x7FF8000000000000, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = phi float [ 0.000000e+00, %[[IF_THEN]] ], [ 1.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: [[FA_SROA_9_0:%.*]] = phi float [ [[DIV_2_I_I]], %[[IF_THEN]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x float> [ [[TMP10]], %[[IF_THEN]] ], [ [[TMP3]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x float> [ [[TMP7]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = fmul <4 x float> [[TMP21]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP22]], float [[FA_SROA_9_0]], i32 1
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP28]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = fmul <2 x float> [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP22]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP11]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x float> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP19:%.*]] = fmul <4 x float> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = fadd <2 x float> [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[CALL25:%.*]] = load volatile ptr, ptr null, align 8
-; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x float> [[TMP18]], [[TMP17]]
-; CHECK-NEXT: [[TMP21:%.*]] = fmul <2 x float> [[TMP20]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x float> [[TMP21]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x float> [[TMP29]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x float> <float 1.000000e+00, float 1.000000e+00, float poison, float poison>, <4 x float> [[TMP30]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP32:%.*]] = fmul <4 x float> <float -0.000000e+00, float -0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP31]]
+; CHECK-NEXT: [[TMP26:%.*]] = fadd <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP32]]
; CHECK-NEXT: [[TMP23:%.*]] = fmul <4 x float> [[TMP19]], zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x float> [[TMP19]], zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> [[TMP24]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x float> [[TMP22]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float 1.000000e+00, float poison, float poison>, <4 x float> [[TMP28]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP27:%.*]] = fadd <4 x float> [[TMP25]], [[TMP26]]
; CHECK-NEXT: store <4 x float> [[TMP27]], ptr [[CALL25]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
index 6942df532ae29..91ec61b275205 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
@@ -25,8 +25,7 @@ define void @foo(double %i) {
; CHECK-NEXT: [[TMP20:%.*]] = fmul double 0.000000e+00, [[I82]]
; CHECK-NEXT: [[I118:%.*]] = fadd double [[TMP19]], [[TMP20]]
; CHECK-NEXT: [[TMP21:%.*]] = fmul <4 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>, double [[I82]], i32 3
-; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], [[TMP23]]
+; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>
; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], zeroinitializer
; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP25]]
; CHECK-NEXT: [[TMP27:%.*]] = fmul <4 x double> [[TMP26]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll
index a07e617384e09..fd7f0c61b6737 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent...
[truncated]
|
|
@llvm/pr-subscribers-vectorizers Author: Alexey Bataev (alexey-bataev) ChangesPatch enables support for float point math operations as base Patch is 79.56 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/169857.diff 16 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 3b36ccbd677dc..c3f8689e62d53 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -5325,13 +5325,14 @@ class slpvectorizer::BoUpSLP {
if (ScheduleCopyableDataMap.empty())
return false;
SmallDenseMap<TreeEntry *, unsigned> PotentiallyReorderedEntriesCount;
- SmallDenseMap<const TreeEntry *, unsigned> OrderedEntriesCount;
+ ArrayRef<TreeEntry *> Entries = SLP.getTreeEntries(User);
+ if (Entries.empty())
+ return false;
+ unsigned CurNumOps = 0;
for (const Use &U : User->operands()) {
if (U.get() != Op)
continue;
- ArrayRef<TreeEntry *> Entries = SLP.getTreeEntries(User);
- if (Entries.empty())
- return false;
+ ++CurNumOps;
// Check all tree entries, if they have operands replaced by copyable
// data.
for (TreeEntry *TE : Entries) {
@@ -5363,28 +5364,28 @@ class slpvectorizer::BoUpSLP {
// reordered.
// Same applies even for non-commutative cmps, because we can invert
// their predicate potentially and, thus, reorder the operands.
+ constexpr unsigned NumCommutativeOps = 2;
bool IsCommutativeUser =
- ::isCommutative(User) ||
- ::isCommutative(TE->getMatchingMainOpOrAltOp(User), User);
- if (!IsCommutativeUser && !isa<CmpInst>(User)) {
- unsigned &OpCnt =
- OrderedEntriesCount.try_emplace(TE, 0).first->getSecond();
+ U.getOperandNo() < NumCommutativeOps &&
+ (::isCommutative(User) ||
+ ::isCommutative(TE->getMatchingMainOpOrAltOp(User), User));
+ bool IsCommutativeWithSameOps =
+ IsCommutativeUser &&
+ User->getNumOperands() >= NumCommutativeOps &&
+ User->getOperand(0) == User->getOperand(1);
+ if ((!IsCommutativeUser || IsCommutativeWithSameOps) &&
+ !isa<CmpInst>(User)) {
EdgeInfo EI(TE, U.getOperandNo());
- if (!getScheduleCopyableData(EI, Op))
+ if (CurNumOps != NumOps || getScheduleCopyableData(EI, Op))
continue;
- // Found copyable operand - continue.
- OpCnt += Inc;
- continue;
+ return false;
}
PotentiallyReorderedEntriesCount.try_emplace(TE, 0)
.first->getSecond() += Inc;
}
}
if (PotentiallyReorderedEntriesCount.empty())
- return all_of(OrderedEntriesCount,
- [&](const std::pair<const TreeEntry *, unsigned> &P) {
- return P.second == NumOps;
- });
+ return true;
// Check the commutative/cmp entries.
for (auto &P : PotentiallyReorderedEntriesCount) {
SmallPtrSet<Value *, 4> ParentsUniqueUsers;
@@ -5430,10 +5431,6 @@ class slpvectorizer::BoUpSLP {
return all_of(PotentiallyReorderedEntriesCount,
[&](const std::pair<const TreeEntry *, unsigned> &P) {
return P.second == NumOps - 1;
- }) &&
- all_of(OrderedEntriesCount,
- [&](const std::pair<const TreeEntry *, unsigned> &P) {
- return P.second == NumOps;
});
}
@@ -5655,6 +5652,7 @@ class slpvectorizer::BoUpSLP {
}
};
+ SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
for (ScheduleBundle *Bundle : Bundles) {
if (ScheduleCopyableDataMap.empty() && TotalOpCount == 0)
break;
@@ -5662,7 +5660,6 @@ class slpvectorizer::BoUpSLP {
// Need to search for the lane since the tree entry can be
// reordered.
auto *It = find(Bundle->getTreeEntry()->Scalars, In);
- SmallDenseSet<std::pair<const ScheduleEntity *, unsigned>> Checked;
bool IsNonSchedulableWithParentPhiNode =
Bundle->getTreeEntry()->doesNotNeedToSchedule() &&
Bundle->getTreeEntry()->UserTreeIndex &&
@@ -10865,7 +10862,9 @@ class InstructionsCompatibilityAnalysis {
Opcode == Instruction::LShr || Opcode == Instruction::Shl ||
Opcode == Instruction::SDiv || Opcode == Instruction::UDiv ||
Opcode == Instruction::And || Opcode == Instruction::Or ||
- Opcode == Instruction::Xor;
+ Opcode == Instruction::Xor || Opcode == Instruction::FAdd ||
+ Opcode == Instruction::FSub || Opcode == Instruction::FMul ||
+ Opcode == Instruction::FDiv;
}
/// Identifies the best candidate value, which represents main opcode
@@ -11198,6 +11197,10 @@ class InstructionsCompatibilityAnalysis {
case Instruction::And:
case Instruction::Or:
case Instruction::Xor:
+ case Instruction::FAdd:
+ case Instruction::FMul:
+ case Instruction::FSub:
+ case Instruction::FDiv:
VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
break;
default:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll
index 0783a28f56d85..961662c664a31 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll
@@ -11,10 +11,10 @@ define void @p(double %0) {
; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], zeroinitializer
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP3]], <2 x i32> <i32 1, i32 7>
; CHECK-NEXT: [[TMP6:%.*]] = fadd <2 x double> zeroinitializer, [[TMP5]]
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x double> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x double> [[TMP6]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x double> <double 1.000000e+00, double 1.000000e+00, double poison, double poison>, <4 x double> [[TMP7]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP10:%.*]] = fmul <4 x double> zeroinitializer, [[TMP9]]
; CHECK-NEXT: [[TMP8:%.*]] = fmul <4 x double> [[TMP4]], zeroinitializer
-; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <2 x double> [[TMP7]], <2 x double> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double poison, double poison>, <4 x double> [[TMP9]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP11:%.*]] = fadd <4 x double> [[TMP8]], [[TMP10]]
; CHECK-NEXT: [[TMP12:%.*]] = fadd <4 x double> [[TMP11]], zeroinitializer
; CHECK-NEXT: [[TMP13:%.*]] = fptosi <4 x double> [[TMP12]] to <4 x i32>
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll b/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll
index 0cc4d3db5c537..1abc16da77c8e 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll
@@ -4,15 +4,16 @@
define void @test() {
; CHECK-LABEL: define void @test() {
; CHECK-NEXT: [[BB:.*]]:
-; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x float> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
-; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00>, <4 x float> <float poison, float 0.000000e+00, float poison, float poison>, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
; CHECK-NEXT: br label %[[BB1:.*]]
; CHECK: [[BB1]]:
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP9:%.*]], %[[BB1]] ]
-; CHECK-NEXT: [[FMUL:%.*]] = fmul float 0.000000e+00, 0.000000e+00
+; CHECK-NEXT: [[FMUL:%.*]] = sitofp i32 0 to float
+; CHECK-NEXT: [[SITOFP:%.*]] = sitofp i32 0 to float
+; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float poison, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[SITOFP]], i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> <float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> <float poison, float poison, float poison, float 0.000000e+00>, <4 x i32> <i32 0, i32 0, i32 poison, i32 7>
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float [[FMUL]], i32 2
-; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
-; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP0]], [[TMP3]]
+; CHECK-NEXT: [[TMP4:%.*]] = fadd <4 x float> [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = fadd <4 x float> [[TMP4]], zeroinitializer
; CHECK-NEXT: [[TMP6:%.*]] = fcmp ogt <4 x float> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
index d13a8578d1e00..c1cc3f2dfc9e5 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll
@@ -7,36 +7,30 @@
define void @main(i1 %arg) {
; CHECK-LABEL: @main(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 %arg, label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[COND_TRUE:%.*]], label [[COND_END:%.*]]
; CHECK: cond.true:
; CHECK-NEXT: unreachable
; CHECK: cond.end:
; CHECK-NEXT: br label [[INVOKE_CONT:%.*]]
; CHECK: invoke.cont:
-; CHECK-NEXT: br i1 %arg, label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]]
+; CHECK-NEXT: br i1 [[ARG]], label [[ARRAYCTOR_CONT:%.*]], label [[INVOKE_CONT]]
; CHECK: arrayctor.cont:
; CHECK-NEXT: [[AGG_TMP101211_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
; CHECK-NEXT: br label [[FOR_COND36_PREHEADER:%.*]]
; CHECK: for.cond36.preheader:
-; CHECK-NEXT: br i1 %arg, label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]]
+; CHECK-NEXT: br i1 [[ARG]], label [[FOR_BODY42_LR_PH_US:%.*]], label [[_Z5CLAMPD_EXIT_1:%.*]]
; CHECK: cond.false51.us:
; CHECK-NEXT: unreachable
; CHECK: cond.true48.us:
-; CHECK-NEXT: br i1 %arg, label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
+; CHECK-NEXT: br i1 [[ARG]], label [[COND_TRUE63_US:%.*]], label [[COND_FALSE66_US:%.*]]
; CHECK: cond.false66.us:
-; CHECK-NEXT: [[ADD_I276_US:%.*]] = fadd double 0.000000e+00, 0x3EB0C6F7A0B5ED8D
-; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x double> <double poison, double 0xBFA5CC2D1960285F>, double [[ADD_I276_US]], i32 0
-; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> <double 0.000000e+00, double 1.000000e-01>, [[TMP0]]
-; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x double> [[TMP1]], splat (double 1.400000e+02)
-; CHECK-NEXT: [[TMP3:%.*]] = fadd <2 x double> [[TMP2]], <double 5.000000e+01, double 5.200000e+01>
-; CHECK-NEXT: store <2 x double> [[TMP3]], ptr undef, align 8
-; CHECK-NEXT: [[TMP4:%.*]] = fmul <2 x double> <double 2.000000e-01, double 3.000000e-01>, [[TMP1]]
-; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8
+; CHECK-NEXT: store <2 x double> <double 0x404900049667B5F2, double 0x404E0515D587DA7B>, ptr undef, align 8
+; CHECK-NEXT: store <2 x double> <double 2.000000e-07, double 0x3F91A436DC4B6CE6>, ptr [[AGG_TMP101211_SROA_0_0_IDX]], align 8
; CHECK-NEXT: ret void
; CHECK: cond.true63.us:
; CHECK-NEXT: unreachable
; CHECK: for.body42.lr.ph.us:
-; CHECK-NEXT: br i1 %arg, label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]]
+; CHECK-NEXT: br i1 [[ARG]], label [[COND_TRUE48_US:%.*]], label [[COND_FALSE51_US:%.*]]
; CHECK: _Z5clampd.exit.1:
; CHECK-NEXT: br label [[FOR_COND36_PREHEADER]]
;
@@ -96,7 +90,7 @@ _Z5clampd.exit.1:
define void @test(i1 %arg) {
; CHECK-LABEL: @test(
; CHECK-NEXT: entry:
-; CHECK-NEXT: br i1 %arg, label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
+; CHECK-NEXT: br i1 [[ARG:%.*]], label [[IF_THEN78:%.*]], label [[IF_THEN38:%.*]]
; CHECK: if.then38:
; CHECK-NEXT: [[AGG_TMP74663_SROA_0_0_IDX:%.*]] = getelementptr inbounds [[STRUCT_RAY:%.*]], ptr undef, i64 0, i32 1, i32 0
; CHECK-NEXT: store <2 x double> <double 0x3FFA356C1D8A7F76, double 0x3FFDC4F38B38BEF4>, ptr [[AGG_TMP74663_SROA_0_0_IDX]], align 8
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
index 6d713e83bbf4e..ca65ff88a4b81 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll
@@ -9,33 +9,38 @@ define void @test(ptr %nExp, float %0, i1 %cmp, float %1) {
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x float> [[TMP2]], float [[TMP0]], i32 3
; CHECK-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
; CHECK: [[IF_THEN]]:
-; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <2 x i32> <i32 3, i32 3>
-; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer
; CHECK-NEXT: [[TMP4:%.*]] = load float, ptr [[NEXP]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 1
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP4]], i32 0
; CHECK-NEXT: [[TMP7:%.*]] = fmul <2 x float> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[DIV_2_I_I:%.*]] = fmul float [[TMP0]], 0.000000e+00
; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> <float poison, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP1]], i32 3
-; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP8]], <2 x float> poison, <4 x i32> <i32 0, i32 poison, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP10]], <4 x i32> <i32 4, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <2 x float> [[TMP7]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x float> [[TMP9]], <4 x float> [[TMP20]], <4 x i32> <i32 5, i32 1, i32 2, i32 3>
; CHECK-NEXT: br label %[[IF_END]]
; CHECK: [[IF_END]]:
-; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x float> [ [[TMP11]], %[[IF_THEN]] ], [ [[TMP3]], %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x float> [ [[TMP8]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x float> [ zeroinitializer, %[[IF_THEN]] ], [ <float 0x7FF8000000000000, float 1.000000e+00>, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x float> [ [[TMP7]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
-; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP14]], <2 x float> <float poison, float 0.000000e+00>, <2 x i32> <i32 1, i32 3>
+; CHECK-NEXT: [[TMP11:%.*]] = phi float [ 0.000000e+00, %[[IF_THEN]] ], [ 0x7FF8000000000000, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP12:%.*]] = phi float [ 0.000000e+00, %[[IF_THEN]] ], [ 1.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: [[FA_SROA_9_0:%.*]] = phi float [ [[DIV_2_I_I]], %[[IF_THEN]] ], [ 0.000000e+00, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP21:%.*]] = phi <4 x float> [ [[TMP10]], %[[IF_THEN]] ], [ [[TMP3]], %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP22:%.*]] = phi <2 x float> [ [[TMP7]], %[[IF_THEN]] ], [ zeroinitializer, %[[ENTRY]] ]
+; CHECK-NEXT: [[TMP19:%.*]] = fmul <4 x float> [[TMP21]], zeroinitializer
+; CHECK-NEXT: [[TMP15:%.*]] = insertelement <2 x float> [[TMP22]], float [[FA_SROA_9_0]], i32 1
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x float> [[TMP28]], <2 x float> poison, <2 x i32> zeroinitializer
; CHECK-NEXT: [[TMP17:%.*]] = fmul <2 x float> [[TMP15]], [[TMP16]]
+; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <2 x float> [[TMP22]], <2 x float> poison, <2 x i32> <i32 1, i32 1>
+; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[TMP11]], i32 0
; CHECK-NEXT: [[TMP18:%.*]] = fmul <2 x float> [[TMP13]], [[TMP14]]
-; CHECK-NEXT: [[TMP19:%.*]] = fmul <4 x float> [[TMP12]], zeroinitializer
+; CHECK-NEXT: [[TMP29:%.*]] = fadd <2 x float> [[TMP17]], [[TMP18]]
; CHECK-NEXT: [[CALL25:%.*]] = load volatile ptr, ptr null, align 8
-; CHECK-NEXT: [[TMP20:%.*]] = fadd <2 x float> [[TMP18]], [[TMP17]]
-; CHECK-NEXT: [[TMP21:%.*]] = fmul <2 x float> [[TMP20]], zeroinitializer
-; CHECK-NEXT: [[TMP22:%.*]] = fadd <2 x float> [[TMP21]], zeroinitializer
+; CHECK-NEXT: [[TMP30:%.*]] = shufflevector <2 x float> [[TMP29]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
+; CHECK-NEXT: [[TMP31:%.*]] = shufflevector <4 x float> <float 1.000000e+00, float 1.000000e+00, float poison, float poison>, <4 x float> [[TMP30]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+; CHECK-NEXT: [[TMP32:%.*]] = fmul <4 x float> <float -0.000000e+00, float -0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP31]]
+; CHECK-NEXT: [[TMP26:%.*]] = fadd <4 x float> <float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP32]]
; CHECK-NEXT: [[TMP23:%.*]] = fmul <4 x float> [[TMP19]], zeroinitializer
; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x float> [[TMP19]], zeroinitializer
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <4 x float> [[TMP23]], <4 x float> [[TMP24]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
-; CHECK-NEXT: [[TMP28:%.*]] = shufflevector <2 x float> [[TMP22]], <2 x float> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
-; CHECK-NEXT: [[TMP26:%.*]] = shufflevector <4 x float> <float 0.000000e+00, float 1.000000e+00, float poison, float poison>, <4 x float> [[TMP28]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
; CHECK-NEXT: [[TMP27:%.*]] = fadd <4 x float> [[TMP25]], [[TMP26]]
; CHECK-NEXT: store <4 x float> [[TMP27]], ptr [[CALL25]], align 4
; CHECK-NEXT: ret void
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
index 6942df532ae29..91ec61b275205 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll
@@ -25,8 +25,7 @@ define void @foo(double %i) {
; CHECK-NEXT: [[TMP20:%.*]] = fmul double 0.000000e+00, [[I82]]
; CHECK-NEXT: [[I118:%.*]] = fadd double [[TMP19]], [[TMP20]]
; CHECK-NEXT: [[TMP21:%.*]] = fmul <4 x double> zeroinitializer, [[TMP1]]
-; CHECK-NEXT: [[TMP23:%.*]] = insertelement <4 x double> <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>, double [[I82]], i32 3
-; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], [[TMP23]]
+; CHECK-NEXT: [[TMP24:%.*]] = fadd <4 x double> [[TMP21]], <double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double poison>
; CHECK-NEXT: [[TMP25:%.*]] = fadd <4 x double> [[TMP24]], zeroinitializer
; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> zeroinitializer, <4 x double> zeroinitializer, <4 x double> [[TMP25]]
; CHECK-NEXT: [[TMP27:%.*]] = fmul <4 x double> [[TMP26]], zeroinitializer
diff --git a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll
index a07e617384e09..fd7f0c61b6737 100644
--- a/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll
+++ b/llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent...
[truncated]
|
You can test this locally with the following command:git diff -U0 --pickaxe-regex -S '([^a-zA-Z0-9#_-]undef([^a-zA-Z0-9_-]|$)|UndefValue::get)' 'HEAD~1' HEAD llvm/include/llvm/IR/Instruction.h llvm/include/llvm/IR/IntrinsicInst.h llvm/lib/IR/Instruction.cpp llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp llvm/test/Transforms/SLPVectorizer/AArch64/shuffle-vectors-mask-size.ll llvm/test/Transforms/SLPVectorizer/X86/bv-root-part-of-graph.ll llvm/test/Transforms/SLPVectorizer/X86/crash_smallpt.ll llvm/test/Transforms/SLPVectorizer/X86/entry-no-bundle-but-extra-use-on-vec.ll llvm/test/Transforms/SLPVectorizer/X86/extractelement-single-use-many-nodes.ll llvm/test/Transforms/SLPVectorizer/X86/multi-node-for-copyable-parent.ll llvm/test/Transforms/SLPVectorizer/X86/multi-node-user-with-copyable-ops.ll llvm/test/Transforms/SLPVectorizer/X86/non-commutative-op-in-commutative-inst.ll llvm/test/Transforms/SLPVectorizer/X86/reused-last-instruction-in-split-node.ll llvm/test/Transforms/SLPVectorizer/X86/user-with-multi-copyable-ops.ll llvm/test/Transforms/SLPVectorizer/X86/vect_copyable_in_binops.ll llvm/test/Transforms/SLPVectorizer/alternate-non-profitable.ll llvm/test/Transforms/SLPVectorizer/crash_exceed_scheduling.ll llvm/test/Transforms/SLPVectorizer/extract-many-users-buildvector.ll llvm/test/Transforms/SLPVectorizer/insertelement-postpone.llThe following files introduce new uses of undef:
Undef is now deprecated and should only be used in the rare cases where no replacement is possible. For example, a load of uninitialized memory yields In tests, avoid using For example, this is considered a bad practice: define void @fn() {
...
br i1 undef, ...
}Please use the following instead: define void @fn(i1 %cond) {
...
br i1 %cond, ...
}Please refer to the Undefined Behavior Manual for more information. |
|
@alexey-bataev sorry I've lost track of your SLP patches - please can you ping any tickets you think are ready (and if there's any preferred order you have for them). |
We can start with this one, then #162018, then #153589, then #150450. I'll try to update these patches |
| SmallDenseMap<const TreeEntry *, unsigned> OrderedEntriesCount; | ||
| ArrayRef<TreeEntry *> Entries = SLP.getTreeEntries(User); | ||
| if (Entries.empty()) | ||
| return false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this looks like a nfc earlyout that could be pulled out into a pre-commit?
Created using spr 1.3.7
Patch enables support for float point math operations as base
instructions for copyable elements. It also fixes some scheduling
issues, found during testing