Skip to content

Commit af13804

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.7
1 parent 2e67f5c commit af13804

File tree

2 files changed

+15
-24
lines changed

2 files changed

+15
-24
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10620,7 +10620,8 @@ class InstructionsCompatibilityAnalysis {
1062010620
/// Checks if the opcode is supported as the main opcode for copyable
1062110621
/// elements.
1062210622
static bool isSupportedOpcode(const unsigned Opcode) {
10623-
return Opcode == Instruction::Add || Opcode == Instruction::LShr;
10623+
return Opcode == Instruction::Add || Opcode == Instruction::LShr ||
10624+
Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
1062410625
}
1062510626

1062610627
/// Identifies the best candidate value, which represents main opcode
@@ -10937,6 +10938,8 @@ class InstructionsCompatibilityAnalysis {
1093710938
switch (MainOpcode) {
1093810939
case Instruction::Add:
1093910940
case Instruction::LShr:
10941+
case Instruction::SDiv:
10942+
case Instruction::UDiv:
1094010943
VectorCost = TTI.getArithmeticInstrCost(MainOpcode, VecTy, Kind);
1094110944
break;
1094210945
default:
@@ -22062,8 +22065,10 @@ bool BoUpSLP::collectValuesToDemote(
2206222065
auto Checker = [&](unsigned BitWidth, unsigned OrigBitWidth) {
2206322066
assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!");
2206422067
return all_of(E.Scalars, [&](Value *V) {
22065-
auto *I = cast<Instruction>(V);
2206622068
APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth);
22069+
if (E.hasCopyableElements() && E.isCopyableElement(V))
22070+
return MaskedValueIsZero(V, Mask, SimplifyQuery(*DL));
22071+
auto *I = cast<Instruction>(V);
2206722072
return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) &&
2206822073
MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL));
2206922074
});

llvm/test/Transforms/SLPVectorizer/X86/no_alternate_divrem.ll

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,21 +4,14 @@
44
define void @test_add_sdiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
55
; CHECK-LABEL: @test_add_sdiv(
66
; CHECK-NEXT: entry:
7-
; CHECK-NEXT: [[GEP2_2:%.*]] = getelementptr i32, ptr [[ARR2:%.*]], i32 2
8-
; CHECK-NEXT: [[GEP2_3:%.*]] = getelementptr i32, ptr [[ARR2]], i32 3
9-
; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP2_2]], align 4
10-
; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP2_3]], align 4
11-
; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
12-
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR2]], align 4
7+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
8+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
9+
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
1310
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
1411
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
1512
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
1613
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
17-
; CHECK-NEXT: [[RES2:%.*]] = sdiv i32 [[V2]], [[Y2]]
18-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
19-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
20-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
21-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
14+
; CHECK-NEXT: [[TMP8:%.*]] = sdiv <4 x i32> [[TMP0]], [[TMP6]]
2215
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
2316
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR3:%.*]], align 4
2417
; CHECK-NEXT: ret void
@@ -58,21 +51,14 @@ entry:
5851
define void @test_add_udiv(ptr %arr1, ptr %arr2, i32 %a0, i32 %a1, i32 %a2, i32 %a3) {
5952
; CHECK-LABEL: @test_add_udiv(
6053
; CHECK-NEXT: entry:
61-
; CHECK-NEXT: [[GEP1_2:%.*]] = getelementptr i32, ptr [[ARR1:%.*]], i32 2
62-
; CHECK-NEXT: [[GEP1_3:%.*]] = getelementptr i32, ptr [[ARR1]], i32 3
63-
; CHECK-NEXT: [[V2:%.*]] = load i32, ptr [[GEP1_2]], align 4
64-
; CHECK-NEXT: [[V3:%.*]] = load i32, ptr [[GEP1_3]], align 4
65-
; CHECK-NEXT: [[Y2:%.*]] = add nsw i32 [[A2:%.*]], 42
66-
; CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[ARR1]], align 4
54+
; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[ARR1:%.*]], align 4
55+
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[A2:%.*]], i32 2
56+
; CHECK-NEXT: [[TMP6:%.*]] = add <4 x i32> [[TMP5]], <i32 1, i32 1, i32 42, i32 1>
6757
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, i32 [[A0:%.*]], i32 0
6858
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[A1:%.*]], i32 1
6959
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i32> [[TMP2]], i32 [[A3:%.*]], i32 3
7060
; CHECK-NEXT: [[TMP4:%.*]] = add nsw <4 x i32> <i32 1146, i32 146, i32 0, i32 0>, [[TMP3]]
71-
; CHECK-NEXT: [[RES2:%.*]] = udiv i32 [[V2]], [[Y2]]
72-
; CHECK-NEXT: [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[RES2]], i32 2
73-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[V3]], i32 3
74-
; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP0]], <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 poison, i32 poison>
75-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
61+
; CHECK-NEXT: [[TMP8:%.*]] = udiv <4 x i32> [[TMP0]], [[TMP6]]
7662
; CHECK-NEXT: [[TMP9:%.*]] = add nsw <4 x i32> [[TMP8]], [[TMP4]]
7763
; CHECK-NEXT: store <4 x i32> [[TMP9]], ptr [[ARR2:%.*]], align 4
7864
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)