Skip to content

Commit 964c771

Browse files
committed
[SLP]Fix the minbitwidth analysis for slternate opcodes
If the laternate operation is more stricter than the main operation, we cannot rely on the analysis of the main operation. In such case, better to avoid doing the analysis at all, since it may affect the overall result and lead to incorrect optimization Fixes llvm#165878
1 parent d310693 commit 964c771

File tree

2 files changed

+27
-8
lines changed

2 files changed

+27
-8
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22134,6 +22134,27 @@ bool BoUpSLP::collectValuesToDemote(
2213422134
{VectorizableTree[E.CombinedEntriesWithIndices.front().first].get(),
2213522135
VectorizableTree[E.CombinedEntriesWithIndices.back().first].get()});
2213622136

22137+
if (E.isAltShuffle()) {
22138+
// Combining these opcodes may lead to incorrect analysis, skip for now.
22139+
auto IsDangerousOpcode = [](unsigned Opcode) {
22140+
switch (Opcode) {
22141+
case Instruction::Shl:
22142+
case Instruction::AShr:
22143+
case Instruction::LShr:
22144+
case Instruction::UDiv:
22145+
case Instruction::SDiv:
22146+
case Instruction::URem:
22147+
case Instruction::SRem:
22148+
return true;
22149+
default:
22150+
break;
22151+
}
22152+
return false;
22153+
};
22154+
if (IsDangerousOpcode(E.getAltOpcode()))
22155+
return FinalAnalysis();
22156+
}
22157+
2213722158
switch (E.getOpcode()) {
2213822159

2213922160
// We can always demote truncations and extensions. Since truncations can

llvm/test/Transforms/SLPVectorizer/X86/alternate-opcode-strict-bitwidth-than-main.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,12 @@ define float @test(i8 %0) {
66
; CHECK-SAME: i8 [[TMP0:%.*]]) {
77
; CHECK-NEXT: [[ENTRY:.*:]]
88
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i8> <i8 poison, i8 0>, i8 [[TMP0]], i32 0
9-
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i16>
10-
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i16> [[TMP2]], <i16 2, i16 27>
11-
; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i16> [[TMP2]], <i16 2, i16 27>
12-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> [[TMP4]], <2 x i32> <i32 0, i32 3>
13-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i32 0
14-
; CHECK-NEXT: [[TMP6:%.*]] = sext i16 [[TMP9]] to i32
15-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x i16> [[TMP5]], i32 1
16-
; CHECK-NEXT: [[TMP7:%.*]] = zext i16 [[TMP10]] to i32
9+
; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i8> [[TMP1]] to <2 x i32>
10+
; CHECK-NEXT: [[TMP3:%.*]] = mul <2 x i32> [[TMP2]], <i32 2, i32 27>
11+
; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[TMP2]], <i32 2, i32 27>
12+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x i32> [[TMP3]], <2 x i32> [[TMP4]], <2 x i32> <i32 0, i32 3>
13+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x i32> [[TMP5]], i32 0
14+
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1
1715
; CHECK-NEXT: [[TMP8:%.*]] = or i32 [[TMP6]], [[TMP7]]
1816
; CHECK-NEXT: switch i32 [[TMP8]], label %[[EXIT:.*]] [
1917
; CHECK-NEXT: i32 0, label %[[EXIT]]

0 commit comments

Comments
 (0)