Skip to content

Commit 2c3aa92

Browse files
committed
[SLP]Fix insertion point for setting for the nodes
The problem with the many def-use chain problems in SLP vectorizer are related to the fact that some nodes reuse the same instruction as insertion point. Insertion point is not the instruction, but the place between instructions. To set it correctly, better to generate pseudo instruction immediately after the last instruction, and use it as insertion point. It resolves the issues in most cases. Fixes #168512 #168576
1 parent 4e275f7 commit 2c3aa92

File tree

5 files changed

+258
-4
lines changed

5 files changed

+258
-4
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2082,6 +2082,7 @@ class slpvectorizer::BoUpSLP {
20822082
MustGather.clear();
20832083
NonScheduledFirst.clear();
20842084
EntryToLastInstruction.clear();
2085+
LastInstructionToPos.clear();
20852086
LoadEntriesToVectorize.clear();
20862087
IsGraphTransformMode = false;
20872088
GatheredLoadsEntriesFirst.reset();
@@ -4593,6 +4594,10 @@ class slpvectorizer::BoUpSLP {
45934594
/// pre-gather them before.
45944595
SmallDenseMap<const TreeEntry *, WeakTrackingVH> EntryToLastInstruction;
45954596

4597+
/// Keeps the mapping between the last instructions and their insertion
4598+
/// points, which is an instruction-after-the-last-instruction.
4599+
SmallDenseMap<const Instruction *, Instruction *> LastInstructionToPos;
4600+
45964601
/// List of gather nodes, depending on other gather/vector nodes, which should
45974602
/// be emitted after the vector instruction emission process to correctly
45984603
/// handle order of the vector instructions and shuffles.
@@ -17894,6 +17899,16 @@ void BoUpSLP::setInsertPointAfterBundle(const TreeEntry *E) {
1789417899
Builder.SetInsertPoint(
1789517900
LastInst->getParent(),
1789617901
LastInst->getNextNode()->getIterator());
17902+
if (Instruction *Res = LastInstructionToPos.lookup(LastInst)) {
17903+
Builder.SetInsertPoint(LastInst->getParent(), Res->getIterator());
17904+
} else {
17905+
Res = Builder.CreateAlignedLoad(Builder.getPtrTy(),
17906+
PoisonValue::get(Builder.getPtrTy()),
17907+
MaybeAlign());
17908+
Builder.SetInsertPoint(LastInst->getParent(), Res->getIterator());
17909+
eraseInstruction(Res);
17910+
LastInstructionToPos.try_emplace(LastInst, Res);
17911+
}
1789717912
}
1789817913
Builder.SetCurrentDebugLocation(Front->getDebugLoc());
1789917914
}

llvm/test/Transforms/SLPVectorizer/X86/gathered-node-with-in-order-parent.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,11 @@ define double @test() {
1010
; CHECK-NEXT: br label %[[BB4]]
1111
; CHECK: [[BB4]]:
1212
; CHECK-NEXT: [[MUL:%.*]] = mul i32 0, 1
13+
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 1>, i32 [[MUL]], i32 0
14+
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP0]], [[TMP4]]
1315
; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1416
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[MUL]], i32 0
1517
; CHECK-NEXT: [[TMP3]] = or <4 x i32> [[TMP1]], [[TMP2]]
16-
; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 1>, i32 [[MUL]], i32 0
17-
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i32> [[TMP0]], [[TMP4]]
1818
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
1919
; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP6]], 0
2020
; CHECK-NEXT: br i1 false, label %[[BB7:.*]], label %[[BB1]]
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -passes=slp-vectorizer -S -slp-threshold=-99999 -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define i32 @test() {
5+
; CHECK-LABEL: define i32 @test() {
6+
; CHECK-NEXT: [[BB:.*]]:
7+
; CHECK-NEXT: br label %[[BB1:.*]]
8+
; CHECK: [[BB1]]:
9+
; CHECK-NEXT: [[TMP0:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], %[[BB24:.*]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
10+
; CHECK-NEXT: [[TMP1:%.*]] = phi <4 x i32> [ [[TMP13:%.*]], %[[BB24]] ], [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB]] ]
11+
; CHECK-NEXT: br i1 false, label %[[BB4:.*]], label %[[BB11:.*]]
12+
; CHECK: [[BB4]]:
13+
; CHECK-NEXT: [[TMP2:%.*]] = phi <2 x double> [ zeroinitializer, %[[BB1]] ]
14+
; CHECK-NEXT: [[TMP3:%.*]] = phi <4 x i32> [ [[TMP0]], %[[BB1]] ]
15+
; CHECK-NEXT: br label %[[BB19:.*]]
16+
; CHECK: [[BB11]]:
17+
; CHECK-NEXT: br i1 false, label %[[BB12:.*]], label %[[BB16:.*]]
18+
; CHECK: [[BB12]]:
19+
; CHECK-NEXT: [[TMP4:%.*]] = or <4 x i32> [[TMP1]], <i32 poison, i32 poison, i32 0, i32 0>
20+
; CHECK-NEXT: br label %[[BB13:.*]]
21+
; CHECK: [[BB13]]:
22+
; CHECK-NEXT: [[TMP5:%.*]] = phi <4 x i32> [ [[TMP4]], %[[BB12]] ]
23+
; CHECK-NEXT: br label %[[BB16]]
24+
; CHECK: [[BB16]]:
25+
; CHECK-NEXT: [[TMP6:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB11]] ], [ [[TMP5]], %[[BB13]] ]
26+
; CHECK-NEXT: br label %[[BB19]]
27+
; CHECK: [[BB19]]:
28+
; CHECK-NEXT: [[PHI22:%.*]] = phi double [ 0.000000e+00, %[[BB4]] ], [ 0.000000e+00, %[[BB16]] ]
29+
; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 0, i32 0>, %[[BB4]] ], [ [[TMP6]], %[[BB16]] ]
30+
; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i32> [[TMP7]], <i32 poison, i32 poison, i32 0, i32 0>
31+
; CHECK-NEXT: br label %[[BB24]]
32+
; CHECK: [[BB24]]:
33+
; CHECK-NEXT: [[TMP9:%.*]] = lshr <4 x i32> [[TMP8]], <i32 poison, i32 poison, i32 0, i32 0>
34+
; CHECK-NEXT: [[TMP10:%.*]] = and <4 x i32> [[TMP9]], <i32 poison, i32 poison, i32 -1, i32 0>
35+
; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
36+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
37+
; CHECK-NEXT: [[TMP13]] = lshr <4 x i32> [[TMP11]], [[TMP12]]
38+
; CHECK-NEXT: [[TMP14:%.*]] = lshr <4 x i32> [[TMP11]], [[TMP10]]
39+
; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i32> [[TMP11]], [[TMP10]]
40+
; CHECK-NEXT: [[TMP16]] = shufflevector <4 x i32> [[TMP14]], <4 x i32> [[TMP15]], <4 x i32> <i32 poison, i32 poison, i32 6, i32 3>
41+
; CHECK-NEXT: br label %[[BB1]]
42+
;
43+
bb:
44+
br label %bb1
45+
46+
bb1:
47+
%phi = phi i32 [ %lshr25, %bb24 ], [ 0, %bb ]
48+
%phi2 = phi i32 [ %or26, %bb24 ], [ 0, %bb ]
49+
%phi3 = phi i32 [ 0, %bb24 ], [ 0, %bb ]
50+
br i1 false, label %bb4, label %bb11
51+
52+
bb4:
53+
%phi6 = phi i32 [ poison, %bb1 ]
54+
%phi7 = phi i32 [ poison, %bb1 ]
55+
%phi9 = phi i32 [ %phi2, %bb1 ]
56+
%phi10 = phi i32 [ %phi, %bb1 ]
57+
%0 = phi <2 x double> [ zeroinitializer, %bb1 ]
58+
br label %bb19
59+
60+
bb11:
61+
br i1 false, label %bb12, label %bb16
62+
63+
bb12:
64+
%or = or i32 0, %phi3
65+
br label %bb13
66+
67+
bb13:
68+
%phi14 = phi i32 [ %phi, %bb12 ]
69+
%phi15 = phi i32 [ %or, %bb12 ]
70+
br label %bb16
71+
72+
bb16:
73+
%phi17 = phi i32 [ 0, %bb11 ], [ %phi14, %bb13 ]
74+
%phi18 = phi i32 [ 0, %bb11 ], [ %phi15, %bb13 ]
75+
br label %bb19
76+
77+
bb19:
78+
%phi20 = phi i32 [ 0, %bb4 ], [ %phi17, %bb16 ]
79+
%phi21 = phi i32 [ 0, %bb4 ], [ %phi18, %bb16 ]
80+
%phi22 = phi double [ 0.000000e+00, %bb4 ], [ 0.000000e+00, %bb16 ]
81+
%or23 = or i32 %phi21, 0
82+
br label %bb24
83+
84+
bb24:
85+
%lshr = lshr i32 %phi20, 0
86+
%and = and i32 %lshr, 0
87+
%lshr25 = lshr i32 %phi, %and
88+
%or26 = or i32 0, %or23
89+
br label %bb1
90+
}
91+
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s
3+
4+
define float @test() {
5+
; CHECK-LABEL: define float @test() {
6+
; CHECK-NEXT: [[LABEL:.*]]:
7+
; CHECK-NEXT: [[SUB_I102_I:%.*]] = fsub float 0.000000e+00, 0.000000e+00
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float poison, float poison>, float [[SUB_I102_I]], i32 2
9+
; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[TMP0]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>
10+
; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> [[TMP1]], float 0.000000e+00, i32 1
11+
; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 1, i32 1>
12+
; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x float> [[TMP3]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
13+
; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float undef, float undef, float undef, float undef>, <8 x float> [[TMP4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
14+
; CHECK-NEXT: [[TMP6:%.*]] = fmul <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP5]]
15+
; CHECK-NEXT: [[TMP7:%.*]] = fadd <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP6]]
16+
; CHECK-NEXT: [[TMP8:%.*]] = fadd <8 x float> [[TMP7]], <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>
17+
; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <2 x i32> <i32 2, i32 poison>
18+
; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <2 x float> [[TMP9]], <2 x float> <float poison, float 1.000000e+00>, <2 x i32> <i32 0, i32 3>
19+
; CHECK-NEXT: [[TMP11:%.*]] = fmul <2 x float> zeroinitializer, [[TMP10]]
20+
; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison, i32 poison, i32 poison>
21+
; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <8 x float> [[TMP12]], <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float undef, float undef, float undef>, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
22+
; CHECK-NEXT: [[TMP14:%.*]] = fmul <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP13]]
23+
; CHECK-NEXT: [[TMP15:%.*]] = fadd <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP14]]
24+
; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <8 x float> [[TMP15]], <8 x float> poison, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison>
25+
; CHECK-NEXT: [[TMP17:%.*]] = shufflevector <12 x float> [[TMP16]], <12 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float undef, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, <12 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
26+
; CHECK-NEXT: [[TMP18:%.*]] = fadd <2 x float> [[TMP11]], zeroinitializer
27+
; CHECK-NEXT: [[TMP19:%.*]] = shufflevector <2 x float> [[TMP18]], <2 x float> poison, <8 x i32> <i32 0, i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
28+
; CHECK-NEXT: [[TMP20:%.*]] = shufflevector <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float poison>, <8 x float> [[TMP19]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
29+
; CHECK-NEXT: [[TMP21:%.*]] = fsub <8 x float> [[TMP20]], [[TMP8]]
30+
; CHECK-NEXT: [[TMP22:%.*]] = fadd <12 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, [[TMP17]]
31+
; CHECK-NEXT: [[TMP23:%.*]] = shufflevector <12 x float> [[TMP22]], <12 x float> poison, <20 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
32+
; CHECK-NEXT: [[TMP24:%.*]] = shufflevector <8 x float> [[TMP21]], <8 x float> poison, <20 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison>
33+
; CHECK-NEXT: [[TMP25:%.*]] = shufflevector <20 x float> [[TMP23]], <20 x float> [[TMP24]], <20 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27>
34+
; CHECK-NEXT: br label %[[REGION_30:.*]]
35+
; CHECK: [[REGION_30]]:
36+
; CHECK-NEXT: [[TMP26:%.*]] = phi <20 x float> [ [[TMP25]], %[[LABEL]] ]
37+
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <20 x float> [[TMP26]], i32 7
38+
; CHECK-NEXT: ret float [[TMP27]]
39+
;
40+
label:
41+
%tmp.0.4.vec.extract = extractelement <4 x float> zeroinitializer, i64 0
42+
%tmp.0.0.vec.extract = extractelement <4 x float> zeroinitializer, i64 0
43+
%tmp.12.36.vec.extract = extractelement <4 x float> zeroinitializer, i64 0
44+
%tmp.7.28.vec.extract = extractelement <4 x float> zeroinitializer, i64 0
45+
%tmp.0.12.vec.extract = extractelement <4 x float> zeroinitializer, i64 0
46+
%mul3.i128.i = fmul float 0.000000e+00, 0.000000e+00
47+
%mul3.i138.i = fmul float 0.000000e+00, 0.000000e+00
48+
%sub.i102.i = fsub float 0.000000e+00, 0.000000e+00
49+
%mul.i136.i = fmul float %sub.i102.i, 0.000000e+00
50+
%v.0.4.vec.extract = extractelement <8 x float> zeroinitializer, i64 0
51+
%v.0.24.vec.extract = extractelement <8 x float> zeroinitializer, i64 0
52+
%v.0.28.vec.extract = extractelement <8 x float> zeroinitializer, i64 0
53+
%v.11.48.vec.extract = extractelement <8 x float> zeroinitializer, i64 0
54+
%v.20.72.vec.extract = extractelement <8 x float> zeroinitializer, i64 0
55+
%v.20.76.vec.extract = extractelement <8 x float> zeroinitializer, i64 0
56+
%0 = fmul float %v.0.4.vec.extract, %tmp.0.0.vec.extract
57+
%1 = fadd float %0, 0.000000e+00
58+
%2 = fmul float %v.0.28.vec.extract, 0.000000e+00
59+
%3 = fmul float %v.0.28.vec.extract, %tmp.0.0.vec.extract
60+
%4 = fadd float %3, 0.000000e+00
61+
%5 = fmul float %tmp.0.4.vec.extract, %v.11.48.vec.extract
62+
%6 = fadd float 0.000000e+00, %5
63+
%7 = fmul float %v.20.76.vec.extract, %tmp.0.0.vec.extract
64+
%8 = fadd float %7, 0.000000e+00
65+
%9 = fmul float 0.000000e+00, %tmp.0.12.vec.extract
66+
%10 = fadd float %2, %9
67+
%11 = fadd float %10, 0.000000e+00
68+
%12 = fsub float 0.000000e+00, %11
69+
%13 = fadd float 0.000000e+00, %1
70+
%14 = fadd float 0.000000e+00, %4
71+
%15 = fadd float 0.000000e+00, %6
72+
%16 = fadd float 0.000000e+00, %8
73+
%17 = fmul float 0.000000e+00, 0.000000e+00
74+
%18 = fmul float 0.000000e+00, %tmp.7.28.vec.extract
75+
%19 = fmul float 0.000000e+00, 0.000000e+00
76+
%20 = fmul float 0.000000e+00, 0.000000e+00
77+
%21 = fmul float 0.000000e+00, %tmp.7.28.vec.extract
78+
%22 = fmul float 0.000000e+00, 0.000000e+00
79+
%23 = fmul float 0.000000e+00, %tmp.12.36.vec.extract
80+
%24 = fadd float %18, %23
81+
%25 = fmul float 0.000000e+00, %tmp.12.36.vec.extract
82+
%26 = fadd float %21, %25
83+
%27 = fsub float 0.000000e+00, poison
84+
%28 = fadd float %24, 0.000000e+00
85+
%29 = fsub float 0.000000e+00, %28
86+
%30 = fadd float %26, 0.000000e+00
87+
%31 = fsub float 0.000000e+00, %30
88+
%32 = fadd float 0.000000e+00, %17
89+
%33 = fadd float 0.000000e+00, %19
90+
%34 = fadd float 0.000000e+00, %20
91+
%35 = fadd float 0.000000e+00, %22
92+
%36 = fmul float 0.000000e+00, %mul3.i138.i
93+
%37 = fmul float %v.0.4.vec.extract, %mul.i136.i
94+
%38 = fadd float %37, 0.000000e+00
95+
%39 = fmul float 0.000000e+00, %mul3.i138.i
96+
%40 = fmul float %mul3.i138.i, %v.0.24.vec.extract
97+
%41 = fadd float 0.000000e+00, %40
98+
%42 = fmul float 0.000000e+00, %mul3.i138.i
99+
%43 = fmul float 0.000000e+00, %mul3.i138.i
100+
%44 = fmul float %mul3.i138.i, %v.20.72.vec.extract
101+
%45 = fadd float 0.000000e+00, %44
102+
%46 = fmul float 0.000000e+00, 1.000000e+00
103+
%47 = fmul float 0.000000e+00, %mul3.i128.i
104+
%48 = fadd float %36, %47
105+
%49 = fmul float 0.000000e+00, %mul3.i128.i
106+
%50 = fadd float %39, %49
107+
%51 = fmul float 0.000000e+00, %mul3.i128.i
108+
%52 = fadd float %42, %51
109+
%53 = fmul float 0.000000e+00, %mul3.i128.i
110+
%54 = fadd float %43, %53
111+
%55 = fadd float %46, 0.000000e+00
112+
%56 = fadd float %48, 0.000000e+00
113+
%57 = fsub float %55, %56
114+
%58 = fadd float %50, 0.000000e+00
115+
%59 = fsub float 0.000000e+00, %58
116+
%60 = fadd float %52, 0.000000e+00
117+
%61 = fsub float 0.000000e+00, %60
118+
%62 = fadd float %mul.i136.i, 0.000000e+00
119+
%63 = fadd float %54, 0.000000e+00
120+
%64 = fsub float %62, %63
121+
%65 = fadd float 0.000000e+00, %38
122+
%66 = fadd float 0.000000e+00, %41
123+
%67 = fadd float 0.000000e+00, %45
124+
br label %region.30
125+
126+
region.30:
127+
%68 = phi float [ %29, %label ]
128+
%69 = phi float [ %66, %label ]
129+
%70 = phi float [ %59, %label ]
130+
%71 = phi float [ %15, %label ]
131+
%72 = phi float [ %34, %label ]
132+
%73 = phi float [ poison, %label ]
133+
%74 = phi float [ %61, %label ]
134+
%75 = phi float [ %16, %label ]
135+
%76 = phi float [ %35, %label ]
136+
%77 = phi float [ %31, %label ]
137+
%78 = phi float [ %67, %label ]
138+
%79 = phi float [ %64, %label ]
139+
%80 = phi float [ %33, %label ]
140+
%81 = phi float [ %12, %label ]
141+
%82 = phi float [ %14, %label ]
142+
%83 = phi float [ %57, %label ]
143+
%84 = phi float [ %65, %label ]
144+
%85 = phi float [ %27, %label ]
145+
%86 = phi float [ %32, %label ]
146+
%87 = phi float [ %13, %label ]
147+
ret float %87
148+
}

llvm/test/Transforms/SLPVectorizer/X86/shuffle-mask-emission.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,11 @@ define i1 @test() {
55
; CHECK-LABEL: define i1 @test() {
66
; CHECK-NEXT: [[ENTRY:.*:]]
77
; CHECK-NEXT: [[H_PROMOTED118_I_FR:%.*]] = freeze i32 1
8+
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[H_PROMOTED118_I_FR]], i32 2
9+
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]]
810
; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[H_PROMOTED118_I_FR]], i32 0
911
; CHECK-NEXT: [[TMP4:%.*]] = add <2 x i32> zeroinitializer, [[TMP3]]
1012
; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[TMP4]], <2 x i32> poison, <4 x i32> <i32 0, i32 0, i32 1, i32 0>
11-
; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i32> <i32 0, i32 0, i32 poison, i32 0>, i32 [[H_PROMOTED118_I_FR]], i32 2
12-
; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> zeroinitializer, [[TMP0]]
1313
; CHECK-NEXT: [[TMP5:%.*]] = add <4 x i32> [[TMP1]], [[TMP2]]
1414
; CHECK-NEXT: [[TMP6:%.*]] = and <4 x i32> [[TMP5]], <i32 0, i32 1, i32 1, i32 1>
1515
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i32> [[TMP6]], <i32 1, i32 0, i32 0, i32 0>

0 commit comments

Comments
 (0)