Skip to content

Commit d1a7225

Browse files
committed
[SLP]Check if the node must keep its original bitwidth
Need to check if during previous analysis the node has requested to keep its original bitwidth to avoid incorrect codegen. Fixes llvm#120076
1 parent 2402bcc commit d1a7225

File tree

2 files changed

+24
-18
lines changed

2 files changed

+24
-18
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 19 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2931,13 +2931,11 @@ class BoUpSLP {
29312931
/// truncation. We collect the entries that will be demoted in ToDemote.
29322932
/// \param E Node for analysis
29332933
/// \param ToDemote indices of the nodes to be demoted.
2934-
bool collectValuesToDemote(const TreeEntry &E, bool IsProfitableToDemoteRoot,
2935-
unsigned &BitWidth,
2936-
SmallVectorImpl<unsigned> &ToDemote,
2937-
DenseSet<const TreeEntry *> &Visited,
2938-
unsigned &MaxDepthLevel,
2939-
bool &IsProfitableToDemote,
2940-
bool IsTruncRoot) const;
2934+
bool collectValuesToDemote(
2935+
const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
2936+
SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
2937+
const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
2938+
bool &IsProfitableToDemote, bool IsTruncRoot) const;
29412939

29422940
/// Check if the operands on the edges \p Edges of the \p UserTE allows
29432941
/// reordering (i.e. the operands can be reordered because they have only one
@@ -17515,8 +17513,8 @@ unsigned BoUpSLP::getVectorElementSize(Value *V) {
1751517513
bool BoUpSLP::collectValuesToDemote(
1751617514
const TreeEntry &E, bool IsProfitableToDemoteRoot, unsigned &BitWidth,
1751717515
SmallVectorImpl<unsigned> &ToDemote, DenseSet<const TreeEntry *> &Visited,
17518-
unsigned &MaxDepthLevel, bool &IsProfitableToDemote,
17519-
bool IsTruncRoot) const {
17516+
const SmallDenseSet<unsigned, 8> &NodesToKeepBWs, unsigned &MaxDepthLevel,
17517+
bool &IsProfitableToDemote, bool IsTruncRoot) const {
1752017518
// We can always demote constants.
1752117519
if (all_of(E.Scalars, IsaPred<Constant>))
1752217520
return true;
@@ -17528,6 +17526,10 @@ bool BoUpSLP::collectValuesToDemote(
1752817526
return true;
1752917527
}
1753017528

17529+
// Check if the node was analyzed already and must keep its original bitwidth.
17530+
if (NodesToKeepBWs.contains(E.Idx))
17531+
return false;
17532+
1753117533
// If the value is not a vectorized instruction in the expression and not used
1753217534
// by the insertelement instruction and not used in multiple vector nodes, it
1753317535
// cannot be demoted.
@@ -17623,8 +17625,8 @@ bool BoUpSLP::collectValuesToDemote(
1762317625
for (const TreeEntry *Op : Operands) {
1762417626
unsigned Level = InitLevel;
1762517627
if (!collectValuesToDemote(*Op, IsProfitableToDemoteRoot, BitWidth,
17626-
ToDemote, Visited, Level, IsProfitableToDemote,
17627-
IsTruncRoot)) {
17628+
ToDemote, Visited, NodesToKeepBWs, Level,
17629+
IsProfitableToDemote, IsTruncRoot)) {
1762817630
if (!IsProfitableToDemote)
1762917631
return false;
1763017632
NeedToExit = true;
@@ -17926,6 +17928,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1792617928
bool IsTruncRoot = false;
1792717929
bool IsProfitableToDemoteRoot = !IsStoreOrInsertElt;
1792817930
SmallVector<unsigned> RootDemotes;
17931+
SmallDenseSet<unsigned, 8> NodesToKeepBWs;
1792917932
if (NodeIdx != 0 &&
1793017933
VectorizableTree[NodeIdx]->State == TreeEntry::Vectorize &&
1793117934
VectorizableTree[NodeIdx]->getOpcode() == Instruction::Trunc) {
@@ -17949,6 +17952,7 @@ void BoUpSLP::computeMinimumValueSizes() {
1794917952
// Check if the root is trunc and the next node is gather/buildvector, then
1795017953
// keep trunc in scalars, which is free in most cases.
1795117954
if (E.isGather() && IsTruncRoot && E.UserTreeIndices.size() == 1 &&
17955+
!NodesToKeepBWs.contains(E.Idx) &&
1795217956
E.Idx > (IsStoreOrInsertElt ? 2u : 1u) &&
1795317957
all_of(E.Scalars, [&](Value *V) {
1795417958
return V->hasOneUse() || isa<Constant>(V) ||
@@ -18071,8 +18075,8 @@ void BoUpSLP::computeMinimumValueSizes() {
1807118075
bool NeedToDemote = IsProfitableToDemote;
1807218076

1807318077
if (!collectValuesToDemote(E, IsProfitableToDemoteRoot, MaxBitWidth,
18074-
ToDemote, Visited, MaxDepthLevel, NeedToDemote,
18075-
IsTruncRoot) ||
18078+
ToDemote, Visited, NodesToKeepBWs, MaxDepthLevel,
18079+
NeedToDemote, IsTruncRoot) ||
1807618080
(MaxDepthLevel <= Limit &&
1807718081
!(((Opcode == Instruction::SExt || Opcode == Instruction::ZExt) &&
1807818082
(!IsTopRoot || !(IsStoreOrInsertElt || UserIgnoreList) ||
@@ -18206,14 +18210,15 @@ void BoUpSLP::computeMinimumValueSizes() {
1820618210
});
1820718211
}
1820818212

18209-
// If the maximum bit width we compute is less than the with of the roots'
18213+
// If the maximum bit width we compute is less than the width of the roots'
1821018214
// type, we can proceed with the narrowing. Otherwise, do nothing.
1821118215
if (MaxBitWidth == 0 ||
1821218216
MaxBitWidth >=
1821318217
cast<IntegerType>(TreeRoot.front()->getType()->getScalarType())
1821418218
->getBitWidth()) {
1821518219
if (UserIgnoreList)
1821618220
AnalyzedMinBWVals.insert(TreeRoot.begin(), TreeRoot.end());
18221+
NodesToKeepBWs.insert(ToDemote.begin(), ToDemote.end());
1821718222
continue;
1821818223
}
1821918224

llvm/test/Transforms/SLPVectorizer/X86/minbw-node-used-twice.ll

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@ define i8 @test() {
66
; CHECK-NEXT: [[ENTRY:.*:]]
77
; CHECK-NEXT: [[SUB_I_I79_PEEL_I:%.*]] = sub i16 0, 1
88
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i16> <i16 poison, i16 0>, i16 [[SUB_I_I79_PEEL_I]], i32 0
9-
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i16> zeroinitializer, [[TMP0]]
10-
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i16>
11-
; CHECK-NEXT: [[TMP3:%.*]] = or <2 x i16> [[TMP2]], [[TMP0]]
12-
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <2 x i16> [[TMP3]], [[TMP0]]
9+
; CHECK-NEXT: [[TMP2:%.*]] = zext <2 x i16> [[TMP0]] to <2 x i32>
10+
; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i32> zeroinitializer, [[TMP2]]
11+
; CHECK-NEXT: [[TMP3:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
12+
; CHECK-NEXT: [[TMP4:%.*]] = or <2 x i32> [[TMP3]], [[TMP2]]
13+
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <2 x i32> [[TMP4]], [[TMP2]]
1314
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
1415
; CHECK-NEXT: [[CONV13_I89_PEEL_I:%.*]] = zext i1 [[TMP5]] to i8
1516
; CHECK-NEXT: ret i8 [[CONV13_I89_PEEL_I]]

0 commit comments

Comments
 (0)