Skip to content

Commit a776618

Browse files
committed
[𝘀𝗽𝗿] initial version
Created using spr 1.3.5
1 parent 61a7289 commit a776618

File tree

9 files changed

+137
-97
lines changed

9 files changed

+137
-97
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -15353,15 +15353,14 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1535315353
constexpr TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput;
1535415354
InstructionCost Cost;
1535515355
auto EstimateInsertCost = [&](unsigned I, Value *V) {
15356-
if (V->getType() != ScalarTy) {
15356+
DemandedElements.setBit(I);
15357+
if (V->getType() != ScalarTy)
1535715358
Cost += TTI->getCastInstrCost(Instruction::Trunc, ScalarTy, V->getType(),
1535815359
TTI::CastContextHint::None, CostKind);
15359-
V = nullptr;
15360-
}
15361-
if (!ForPoisonSrc)
15362-
DemandedElements.setBit(I);
1536315360
};
1536415361
SmallVector<int> ShuffleMask(VL.size(), PoisonMaskElem);
15362+
SmallVector<int> ConstantShuffleMask(VL.size(), PoisonMaskElem);
15363+
std::iota(ConstantShuffleMask.begin(), ConstantShuffleMask.end(), 0);
1536515364
for (unsigned I = 0, E = VL.size(); I < E; ++I) {
1536615365
Value *V = VL[I];
1536715366
// No need to shuffle duplicates for constants.
@@ -15371,6 +15370,11 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1537115370
continue;
1537215371
}
1537315372

15373+
if (isConstant(V)) {
15374+
ConstantShuffleMask[I] = I + E;
15375+
ShuffleMask[I] = I;
15376+
continue;
15377+
}
1537415378
auto Res = UniqueElements.try_emplace(V, I);
1537515379
if (Res.second) {
1537615380
EstimateInsertCost(I, V);
@@ -15382,18 +15386,28 @@ InstructionCost BoUpSLP::getGatherCost(ArrayRef<Value *> VL, bool ForPoisonSrc,
1538215386
ShuffledElements.setBit(I);
1538315387
ShuffleMask[I] = Res.first->second;
1538415388
}
15385-
if (ForPoisonSrc) {
15386-
Cost = getScalarizationOverhead(*TTI, ScalarTy, VecTy,
15387-
/*DemandedElts*/ ~ShuffledElements,
15388-
/*Insert*/ true,
15389-
/*Extract*/ false, CostKind,
15390-
/*ForPoisonSrc=*/true, VL);
15391-
} else if (!DemandedElements.isZero()) {
15389+
// FIXME: add a cost for constant vector materialization.
15390+
bool IsAnyNonUndefConst =
15391+
any_of(VL, [](Value *V) { return !isa<UndefValue>(V) && isConstant(V); });
15392+
// 1. Shuffle input source vector and constant vector.
15393+
if (!ForPoisonSrc && IsAnyNonUndefConst) {
15394+
Cost += ::getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteTwoSrc,
15395+
VecTy, ConstantShuffleMask);
15396+
for (auto [Idx, I] : enumerate(ShuffleMask)) {
15397+
if (I == PoisonMaskElem)
15398+
I = Idx;
15399+
else
15400+
I += VL.size();
15401+
}
15402+
}
15403+
15404+
// 2. Insert unique non-constants.
15405+
if (!DemandedElements.isZero())
1539215406
Cost += getScalarizationOverhead(*TTI, ScalarTy, VecTy, DemandedElements,
1539315407
/*Insert=*/true,
1539415408
/*Extract=*/false, CostKind,
15395-
/*ForPoisonSrc=*/false, VL);
15396-
}
15409+
ForPoisonSrc && !IsAnyNonUndefConst, VL);
15410+
// 3. Shuffle duplicates.
1539715411
if (DuplicateNonConst)
1539815412
Cost += ::getShuffleCost(*TTI, TargetTransformInfo::SK_PermuteSingleSrc,
1539915413
VecTy, ShuffleMask);

llvm/test/Transforms/SLPVectorizer/SystemZ/vec-elt-insertion.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,13 +114,13 @@ define void @fun2(ptr %0, ptr %Dst) {
114114
; CHECK: [[BB4]]:
115115
; CHECK-NEXT: ret void
116116
; CHECK: [[BB5]]:
117-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 24
118-
; CHECK-NEXT: store i64 [[TMP2]], ptr [[TMP6]], align 8
119117
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 16
120-
; CHECK-NEXT: store i64 0, ptr [[TMP7]], align 8
118+
; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x i64> <i64 0, i64 poison>, i64 [[TMP2]], i32 1
119+
; CHECK-NEXT: store <2 x i64> [[TMP8]], ptr [[TMP7]], align 8
121120
; CHECK-NEXT: br label %[[BB4]]
122121
;
123-
; REMARK-NOT: Function: fun2
122+
; Looks like there is bug in TTI, where insertion into index 1 is free, while insertion in to index 0 is 1.
123+
; REMARK: Function: fun2
124124

125125
%3 = load i64, ptr %0, align 8
126126
%4 = icmp eq i64 %3, 0

llvm/test/Transforms/SLPVectorizer/X86/lookahead.ll

Lines changed: 30 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -520,22 +520,36 @@ define i1 @ExtractIdxNotConstantInt2(float %a, float %b, float %c, <4 x float> %
520520

521521

522522
define i1 @foo(float %a, float %b, float %c, <4 x float> %vec, i64 %idx2) {
523-
; CHECK-LABEL: @foo(
524-
; CHECK-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0
525-
; CHECK-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
526-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
527-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[C:%.*]], i32 1
528-
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[VEC]], <4 x float> poison, <2 x i32> <i32 poison, i32 1>
529-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[SUB14_I167]], i32 0
530-
; CHECK-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
531-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+01>, float [[B:%.*]], i32 0
532-
; CHECK-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP5]], [[TMP6]]
533-
; CHECK-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP7]], <float 1.000000e+01, float 2.000000e+00>
534-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
535-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
536-
; CHECK-NEXT: [[MUL123_I184:%.*]] = fmul float [[TMP9]], [[TMP10]]
537-
; CHECK-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
538-
; CHECK-NEXT: ret i1 [[CMP_I185]]
523+
; SSE-LABEL: @foo(
524+
; SSE-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0
525+
; SSE-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
526+
; SSE-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[A:%.*]], i32 0
527+
; SSE-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[C:%.*]], i32 1
528+
; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[VEC]], <4 x float> poison, <2 x i32> <i32 poison, i32 1>
529+
; SSE-NEXT: [[TMP4:%.*]] = insertelement <2 x float> [[TMP3]], float [[SUB14_I167]], i32 0
530+
; SSE-NEXT: [[TMP5:%.*]] = fmul <2 x float> [[TMP2]], [[TMP4]]
531+
; SSE-NEXT: [[TMP6:%.*]] = insertelement <2 x float> <float poison, float 3.000000e+01>, float [[B:%.*]], i32 0
532+
; SSE-NEXT: [[TMP7:%.*]] = fsub <2 x float> [[TMP5]], [[TMP6]]
533+
; SSE-NEXT: [[TMP8:%.*]] = fadd <2 x float> [[TMP7]], <float 1.000000e+01, float 2.000000e+00>
534+
; SSE-NEXT: [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
535+
; SSE-NEXT: [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
536+
; SSE-NEXT: [[MUL123_I184:%.*]] = fmul float [[TMP9]], [[TMP10]]
537+
; SSE-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
538+
; SSE-NEXT: ret i1 [[CMP_I185]]
539+
;
540+
; AVX-LABEL: @foo(
541+
; AVX-NEXT: [[VECEXT_I291_I166:%.*]] = extractelement <4 x float> [[VEC:%.*]], i64 0
542+
; AVX-NEXT: [[SUB14_I167:%.*]] = fsub float undef, [[VECEXT_I291_I166]]
543+
; AVX-NEXT: [[FM:%.*]] = fmul float [[A:%.*]], [[SUB14_I167]]
544+
; AVX-NEXT: [[SUB25_I168:%.*]] = fsub float [[FM]], [[B:%.*]]
545+
; AVX-NEXT: [[VECEXT_I276_I169:%.*]] = extractelement <4 x float> [[VEC]], i64 1
546+
; AVX-NEXT: [[ADD36_I173:%.*]] = fadd float [[SUB25_I168]], 1.000000e+01
547+
; AVX-NEXT: [[MUL72_I179:%.*]] = fmul float [[C:%.*]], [[VECEXT_I276_I169]]
548+
; AVX-NEXT: [[ADD78_I180:%.*]] = fsub float [[MUL72_I179]], 3.000000e+01
549+
; AVX-NEXT: [[ADD79_I181:%.*]] = fadd float 2.000000e+00, [[ADD78_I180]]
550+
; AVX-NEXT: [[MUL123_I184:%.*]] = fmul float [[ADD36_I173]], [[ADD79_I181]]
551+
; AVX-NEXT: [[CMP_I185:%.*]] = fcmp ogt float [[MUL123_I184]], 0.000000e+00
552+
; AVX-NEXT: ret i1 [[CMP_I185]]
539553
;
540554
%vecext.i291.i166 = extractelement <4 x float> %vec, i64 0
541555
%sub14.i167 = fsub float undef, %vecext.i291.i166

llvm/test/Transforms/SLPVectorizer/X86/multi-incoming-blocks-in-phi.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,8 @@ define void @foo(ptr %arg) {
2626
; CHECK-NEXT: [[PHI:%.*]] = phi float [ 4.000000e+00, %[[BB]] ], [ 0.000000e+00, %[[BB27:.*]] ]
2727
; CHECK-NEXT: [[FADD8:%.*]] = fadd float 0.000000e+00, 0.000000e+00
2828
; CHECK-NEXT: [[FADD9:%.*]] = fadd float [[PHI]], 1.000000e+00
29-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[FADD9]], i32 0
30-
; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x float> <float 1.000000e+00, float 0.000000e+00>, [[TMP0]]
31-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0
29+
; CHECK-NEXT: [[TMP2:%.*]] = fadd float [[FADD9]], 1.000000e+00
30+
; CHECK-NEXT: [[FADD11:%.*]] = fadd float 0.000000e+00, 0.000000e+00
3231
; CHECK-NEXT: [[FREM:%.*]] = frem float [[TMP2]], 7.000000e+00
3332
; CHECK-NEXT: [[CALL12:%.*]] = call i32 @llvm.x86.sse.cvttss2si(<4 x float> zeroinitializer)
3433
; CHECK-NEXT: switch i32 [[CALL12]], label %[[BB13:.*]] [
@@ -60,7 +59,6 @@ define void @foo(ptr %arg) {
6059
; CHECK-NEXT: br label %[[BB20:.*]]
6160
; CHECK: [[BB20]]:
6261
; CHECK-NEXT: [[FADD21:%.*]] = fadd float [[FADD18]], 1.000000e+00
63-
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> <float poison, float 0.000000e+00>, float [[FADD21]], i32 0
6462
; CHECK-NEXT: switch i32 0, label %[[BB22:.*]] [
6563
; CHECK-NEXT: i32 125, label %[[BB30]]
6664
; CHECK-NEXT: i32 98, label %[[BB30]]
@@ -71,8 +69,8 @@ define void @foo(ptr %arg) {
7169
; CHECK-NEXT: i32 121, label %[[BB30]]
7270
; CHECK-NEXT: ]
7371
; CHECK: [[BB22]]:
74-
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x float> <float 1.000000e+00, float 0.000000e+00>, [[TMP3]]
75-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP4]], i32 0
72+
; CHECK-NEXT: [[TMP5:%.*]] = fadd float [[FADD21]], 1.000000e+00
73+
; CHECK-NEXT: [[TMP6:%.*]] = fadd float 0.000000e+00, 0.000000e+00
7674
; CHECK-NEXT: [[FREM25:%.*]] = frem float [[TMP5]], 7.000000e+00
7775
; CHECK-NEXT: [[FMUL26:%.*]] = fmul float [[FREM25]], 5.000000e+00
7876
; CHECK-NEXT: switch i32 0, label %[[BB27]] [
@@ -86,11 +84,11 @@ define void @foo(ptr %arg) {
8684
; CHECK-NEXT: ]
8785
; CHECK: [[BB27]]:
8886
; CHECK-NEXT: [[FADD28:%.*]] = fadd float [[TMP5]], 1.000000e+00
89-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x float> [[TMP4]], i32 1
9087
; CHECK-NEXT: [[FADD29:%.*]] = fadd float [[TMP6]], 0.000000e+00
9188
; CHECK-NEXT: br label %[[BB7]]
9289
; CHECK: [[BB30]]:
93-
; CHECK-NEXT: [[TMP7:%.*]] = phi <2 x float> [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ [[TMP1]], %[[BB7]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ zeroinitializer, %[[BB13]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP3]], %[[BB20]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ], [ [[TMP4]], %[[BB22]] ]
90+
; CHECK-NEXT: [[PHI31:%.*]] = phi float [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB7]] ], [ [[TMP2]], %[[BB7]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ [[FADD21]], %[[BB20]] ], [ [[FADD21]], %[[BB20]] ], [ [[FADD21]], %[[BB20]] ], [ [[FADD21]], %[[BB20]] ], [ [[FADD21]], %[[BB20]] ], [ [[FADD21]], %[[BB20]] ], [ [[FADD21]], %[[BB20]] ], [ [[TMP5]], %[[BB22]] ], [ [[TMP5]], %[[BB22]] ], [ [[TMP5]], %[[BB22]] ], [ [[TMP5]], %[[BB22]] ], [ [[TMP5]], %[[BB22]] ], [ [[TMP5]], %[[BB22]] ], [ [[TMP5]], %[[BB22]] ]
91+
; CHECK-NEXT: [[PHI32:%.*]] = phi float [ [[FADD11]], %[[BB7]] ], [ [[FADD11]], %[[BB7]] ], [ [[FADD11]], %[[BB7]] ], [ [[FADD11]], %[[BB7]] ], [ [[FADD11]], %[[BB7]] ], [ [[FADD11]], %[[BB7]] ], [ [[FADD11]], %[[BB7]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB13]] ], [ 0.000000e+00, %[[BB20]] ], [ 0.000000e+00, %[[BB20]] ], [ 0.000000e+00, %[[BB20]] ], [ 0.000000e+00, %[[BB20]] ], [ 0.000000e+00, %[[BB20]] ], [ 0.000000e+00, %[[BB20]] ], [ 0.000000e+00, %[[BB20]] ], [ [[TMP6]], %[[BB22]] ], [ [[TMP6]], %[[BB22]] ], [ [[TMP6]], %[[BB22]] ], [ [[TMP6]], %[[BB22]] ], [ [[TMP6]], %[[BB22]] ], [ [[TMP6]], %[[BB22]] ], [ [[TMP6]], %[[BB22]] ]
9492
; CHECK-NEXT: ret void
9593
;
9694
bb:

llvm/test/Transforms/SLPVectorizer/X86/reduced-val-extracted-and-externally-used.ll

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,28 +5,37 @@ define void @test(i32 %arg) {
55
; CHECK-LABEL: define void @test(
66
; CHECK-SAME: i32 [[ARG:%.*]]) {
77
; CHECK-NEXT: [[BB:.*]]:
8-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[ARG]], i32 0
98
; CHECK-NEXT: br label %[[BB1:.*]]
109
; CHECK: [[BB1]]:
1110
; CHECK-NEXT: [[PHI:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP5:%.*]], %[[BB1]] ]
12-
; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[TMP6:%.*]], %[[BB1]] ]
13-
; CHECK-NEXT: [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[OP_RDX4:%.*]], %[[BB1]] ]
14-
; CHECK-NEXT: [[TMP1:%.*]] = phi <2 x i32> [ zeroinitializer, %[[BB]] ], [ [[TMP4:%.*]], %[[BB1]] ]
15-
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP1]], <2 x i32> poison, <8 x i32> <i32 0, i32 0, i32 1, i32 0, i32 0, i32 1, i32 0, i32 0>
11+
; CHECK-NEXT: [[PHI2:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[ADD24:%.*]], %[[BB1]] ]
12+
; CHECK-NEXT: [[PHI3:%.*]] = phi i32 [ 0, %[[BB]] ], [ [[XOR26:%.*]], %[[BB1]] ]
1613
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[PHI2]], 0
1714
; CHECK-NEXT: [[ADD4:%.*]] = add i32 [[PHI2]], 0
15+
; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[ADD]], [[ADD4]]
1816
; CHECK-NEXT: [[ADD23:%.*]] = add i32 [[PHI]], 0
1917
; CHECK-NEXT: [[ADD6:%.*]] = add i32 [[PHI2]], 0
20-
; CHECK-NEXT: [[TMP3:%.*]] = add <8 x i32> [[TMP2]], zeroinitializer
21-
; CHECK-NEXT: [[TMP4]] = add <2 x i32> [[TMP0]], <i32 0, i32 1>
22-
; CHECK-NEXT: [[TMP5]] = extractelement <2 x i32> [[TMP4]], i32 1
23-
; CHECK-NEXT: [[TMP6]] = extractelement <2 x i32> [[TMP4]], i32 0
24-
; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.vector.reduce.xor.v8i32(<8 x i32> [[TMP3]])
25-
; CHECK-NEXT: [[OP_RDX:%.*]] = xor i32 [[TMP7]], [[ADD]]
26-
; CHECK-NEXT: [[OP_RDX1:%.*]] = xor i32 [[ADD4]], [[ADD6]]
27-
; CHECK-NEXT: [[OP_RDX2:%.*]] = xor i32 [[ADD23]], [[TMP6]]
18+
; CHECK-NEXT: [[ADD7:%.*]] = add i32 [[PHI2]], 0
19+
; CHECK-NEXT: [[XOR8:%.*]] = xor i32 [[ADD6]], [[XOR]]
20+
; CHECK-NEXT: [[XOR9:%.*]] = xor i32 [[XOR8]], [[ADD23]]
21+
; CHECK-NEXT: [[OP_RDX1:%.*]] = xor i32 [[XOR9]], [[ADD7]]
22+
; CHECK-NEXT: [[OP_RDX2:%.*]] = add i32 [[PHI]], 0
23+
; CHECK-NEXT: [[OP_RDX:%.*]] = add i32 [[PHI2]], 0
24+
; CHECK-NEXT: [[ADD13:%.*]] = add i32 [[PHI2]], 0
2825
; CHECK-NEXT: [[OP_RDX3:%.*]] = xor i32 [[OP_RDX]], [[OP_RDX1]]
29-
; CHECK-NEXT: [[OP_RDX4]] = xor i32 [[OP_RDX3]], [[OP_RDX2]]
26+
; CHECK-NEXT: [[OP_RDX4:%.*]] = xor i32 [[OP_RDX3]], [[OP_RDX2]]
27+
; CHECK-NEXT: [[XOR16:%.*]] = xor i32 [[OP_RDX4]], [[ADD13]]
28+
; CHECK-NEXT: [[ADD17:%.*]] = add i32 [[PHI]], 0
29+
; CHECK-NEXT: [[ADD18:%.*]] = add i32 [[PHI2]], 0
30+
; CHECK-NEXT: [[ADD19:%.*]] = add i32 [[PHI2]], 0
31+
; CHECK-NEXT: [[XOR20:%.*]] = xor i32 [[ADD18]], [[XOR16]]
32+
; CHECK-NEXT: [[XOR21:%.*]] = xor i32 [[XOR20]], [[ADD17]]
33+
; CHECK-NEXT: [[XOR22:%.*]] = xor i32 [[XOR21]], [[ADD19]]
34+
; CHECK-NEXT: [[ADD25:%.*]] = add i32 [[PHI2]], 0
35+
; CHECK-NEXT: [[ADD24]] = add i32 [[ARG]], 0
36+
; CHECK-NEXT: [[XOR25:%.*]] = xor i32 [[ADD25]], [[XOR22]]
37+
; CHECK-NEXT: [[XOR26]] = xor i32 [[XOR25]], [[ADD24]]
38+
; CHECK-NEXT: [[TMP5]] = add i32 1, 0
3039
; CHECK-NEXT: [[ICMP:%.*]] = icmp ult i32 [[TMP5]], 0
3140
; CHECK-NEXT: br label %[[BB1]]
3241
;

llvm/test/Transforms/SLPVectorizer/X86/replaced-external-in-reduction.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -5,21 +5,21 @@ define void @test(i32 %0, ptr %p) {
55
; CHECK-LABEL: define void @test(
66
; CHECK-SAME: i32 [[TMP0:%.*]], ptr [[P:%.*]]) {
77
; CHECK-NEXT: entry:
8-
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 0, i32 poison, i32 0, i32 0>, i32 [[TMP0]], i32 1
9-
; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i32> [[TMP1]], <i32 1, i32 0, i32 1, i32 1>
10-
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 1, i32 0>
11-
; CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP0]], 0
12-
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
8+
; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, i32 [[TMP0]], i32 3
9+
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i32> [[TMP1]], <i32 1, i32 1, i32 1, i32 0>
10+
; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i32> [[TMP5]], <4 x i32> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
11+
; CHECK-NEXT: [[OP_RDX:%.*]] = extractelement <8 x i32> [[TMP8]], i32 3
1312
; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[PH:%.*]]
1413
; CHECK: ph:
15-
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 poison, i32 0, i32 0, i32 0, i32 0, i32 0>, i32 [[TMP0]], i32 2
14+
; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 poison, i32 0, i32 0, i32 0>, i32 [[TMP0]], i32 4
1615
; CHECK-NEXT: br label [[EXIT]]
1716
; CHECK: exit:
1817
; CHECK-NEXT: [[TMP9:%.*]] = phi <8 x i32> [ [[TMP8]], [[ENTRY:%.*]] ], [ [[TMP6]], [[PH]] ]
1918
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ [[TMP5]], [[ENTRY]] ], [ zeroinitializer, [[PH]] ]
20-
; CHECK-NEXT: [[OP_RDX:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP9]])
21-
; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP7]])
22-
; CHECK-NEXT: [[OP_RDX5:%.*]] = or i32 [[TMP10]], [[TMP3]]
19+
; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.vector.extract.v4i32.v8i32(<8 x i32> [[TMP9]], i64 0)
20+
; CHECK-NEXT: [[RDX_OP:%.*]] = or <4 x i32> [[TMP10]], [[TMP7]]
21+
; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.vector.insert.v8i32.v4i32(<8 x i32> [[TMP9]], <4 x i32> [[RDX_OP]], i64 0)
22+
; CHECK-NEXT: [[OP_RDX5:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP11]])
2323
; CHECK-NEXT: [[OP_RDX2:%.*]] = or i32 [[OP_RDX5]], [[OP_RDX]]
2424
; CHECK-NEXT: store i32 [[OP_RDX2]], ptr [[P]], align 4
2525
; CHECK-NEXT: ret void

llvm/test/Transforms/SLPVectorizer/X86/select-reduction-op.ll

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@
44
define i1 @src(i1 %cmp4.118.i) {
55
; CHECK-LABEL: define i1 @src(
66
; CHECK-SAME: i1 [[CMP4_118_I:%.*]]) {
7-
; CHECK-NEXT: [[CMP4_118_I_NOT:%.*]] = xor i1 [[CMP4_118_I]], true
8-
; CHECK-NEXT: [[TMP1:%.*]] = freeze <4 x i1> poison
7+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i1> <i1 poison, i1 true, i1 true, i1 true>, i1 [[CMP4_118_I]], i32 0
8+
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 poison, i1 poison, i1 poison>
9+
; CHECK-NEXT: [[DOTNOT7:%.*]] = xor i1 poison, true
10+
; CHECK-NEXT: [[TMP1:%.*]] = freeze <4 x i1> [[TMP5]]
911
; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP1]])
10-
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[CMP4_118_I_NOT]], i1 true, i1 [[TMP2]]
12+
; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP2]], i1 true, i1 [[DOTNOT7]]
1113
; CHECK-NEXT: [[TMP3:%.*]] = freeze i1 [[OP_RDX]]
1214
; CHECK-NEXT: [[OP_RDX1:%.*]] = select i1 [[TMP3]], i1 true, i1 poison
1315
; CHECK-NEXT: ret i1 [[OP_RDX1]]

llvm/test/Transforms/SLPVectorizer/X86/vectorize-cmps.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44
define i32 @test(ptr %isec, float %0) {
55
; CHECK-LABEL: @test(
66
; CHECK-NEXT: entry:
7-
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[ISEC:%.*]], align 4
8-
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> <float 0.000000e+00, float poison>, float [[TMP0:%.*]], i32 1
9-
; CHECK-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP2]], [[TMP1]]
7+
; CHECK-NEXT: [[TMP1:%.*]] = load float, ptr [[ISEC:%.*]], align 4
8+
; CHECK-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, ptr [[ISEC]], i64 1
9+
; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10]], align 4
10+
; CHECK-NEXT: [[TMP5:%.*]] = fmul fast float [[TMP0:%.*]], [[TMP2]]
11+
; CHECK-NEXT: [[TMP4:%.*]] = fmul fast float 0.000000e+00, [[TMP1]]
1012
; CHECK-NEXT: [[CMP61:%.*]] = fcmp fast oge float 0.000000e+00, 0.000000e+00
11-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
12-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
1313
; CHECK-NEXT: [[CMP63:%.*]] = fcmp fast ogt float [[TMP4]], [[TMP5]]
1414
; CHECK-NEXT: br i1 [[CMP63]], label [[CLEANUP:%.*]], label [[IF_END:%.*]]
1515
; CHECK: if.end:

0 commit comments

Comments
 (0)