Skip to content

Commit e4bb52b

Browse files
committed
!Fixup, implement Cmp instruction costs in VPInstruction::computeCost.
1 parent 8e04ed6 commit e4bb52b

File tree

9 files changed

+125
-336
lines changed

9 files changed

+125
-336
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1182,6 +1182,20 @@ InstructionCost VPInstruction::computeCost(ElementCount VF,
11821182
return Ctx.TTI.getArithmeticInstrCost(Instruction::Xor, RetTy,
11831183
Ctx.CostKind);
11841184
}
1185+
case Instruction::ICmp:
1186+
case Instruction::FCmp: {
1187+
Instruction *CtxI = dyn_cast_or_null<Instruction>(getUnderlyingValue());
1188+
Type *SrcTy = Ctx.Types.inferScalarType(getOperand(0));
1189+
Type *RetTy = Ctx.Types.inferScalarType(this);
1190+
if (!vputils::onlyFirstLaneUsed(this)) {
1191+
SrcTy = toVectorTy(SrcTy, VF);
1192+
RetTy = toVectorTy(RetTy, VF);
1193+
}
1194+
return Ctx.TTI.getCmpSelInstrCost(Opcode, SrcTy, RetTy, getPredicate(),
1195+
Ctx.CostKind,
1196+
{TTI::OK_AnyValue, TTI::OP_None},
1197+
{TTI::OK_AnyValue, TTI::OP_None}, CtxI);
1198+
}
11851199
case VPInstruction::ExtractLastElement: {
11861200
// Add on the cost of extracting the element.
11871201
auto *VecTy = toVectorTy(Ctx.Types.inferScalarType(getOperand(0)), VF);

llvm/test/Transforms/LoopVectorize/AArch64/early_exit_costs.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,11 @@ loop.end:
9292

9393
define i64 @vectorization_not_profitable_due_to_trunc(ptr dereferenceable(800) %src) {
9494
; CHECK-LABEL: LV: Checking a loop in 'vectorization_not_profitable_due_to_trunc'
95-
; CHECK: Calculating cost of work in exit block vector.early.exit:
96-
; CHECK-NEXT: Cost of 6 for VF 2: EMIT vp<{{.*}}> = first-active-lane ir<{{.*}}>
97-
; CHECK-NEXT: Cost of 2 for VF 2: EMIT vp<{{.*}}> = extract-lane vp<{{.*}}>, ir<{{.*}}>
98-
; CHECK: LV: Minimum required TC for runtime checks to be profitable:28
99-
; CHECK: LV: Found a vectorizable loop (2)
95+
; CHECK: LV: Selecting VF: 1.
96+
; CHECK-NEXT: Calculating cost of work in exit block vector.early.exit:
97+
; CHECK-NEXT: Cost of 1 for VF 1: EMIT vp<%first.active.lane> = first-active-lane ir<%t>
98+
; CHECK-NEXT: Cost of 0 for VF 1: EMIT vp<%early.exit.value> = extract-lane vp<%first.active.lane>, ir<%l>
99+
; CHECK-NEXT: LV: Vectorization is possible but not beneficial.
100100
entry:
101101
br label %loop.header
102102

llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll

Lines changed: 48 additions & 124 deletions
Original file line numberDiff line numberDiff line change
@@ -49,48 +49,23 @@ bb3:
4949
define void @redundant_or_1(ptr %dst, i1 %c.0, i1 %c.1) {
5050
; CHECK-LABEL: @redundant_or_1(
5151
; CHECK-NEXT: entry:
52-
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
53-
; CHECK: vector.ph:
54-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0
55-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
56-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0
57-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer
58-
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
59-
; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer
60-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
61-
; CHECK: vector.body:
62-
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
63-
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
64-
; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
65-
; CHECK: pred.store.if:
66-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
67-
; CHECK-NEXT: store i32 0, ptr [[TMP8]], align 4
68-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
69-
; CHECK: pred.store.continue:
70-
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
71-
; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
72-
; CHECK: pred.store.if3:
73-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1
74-
; CHECK-NEXT: store i32 0, ptr [[TMP11]], align 4
75-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
76-
; CHECK: pred.store.continue4:
77-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
78-
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
79-
; CHECK: pred.store.if5:
80-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2
52+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE:%.*]]
53+
; CHECK: loop.header:
54+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[PRED_STORE_IF3:%.*]] ]
55+
; CHECK-NEXT: br i1 [[TMP9:%.*]], label [[PRED_STORE_IF3]], label [[PRED_STORE_CONTINUE4:%.*]]
56+
; CHECK: then.1:
57+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2
58+
; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], true
59+
; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false
60+
; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[PRED_STORE_IF3]]
61+
; CHECK: then.2:
62+
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[IV]]
8163
; CHECK-NEXT: store i32 0, ptr [[TMP14]], align 4
82-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
83-
; CHECK: pred.store.continue6:
84-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
85-
; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
86-
; CHECK: pred.store.if7:
87-
; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3
88-
; CHECK-NEXT: store i32 0, ptr [[TMP17]], align 4
89-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
90-
; CHECK: pred.store.continue8:
91-
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
92-
; CHECK: middle.block:
93-
; CHECK-NEXT: br label [[LOOP_LATCH:%.*]]
64+
; CHECK-NEXT: br label [[PRED_STORE_IF3]]
65+
; CHECK: loop.latch:
66+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
67+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3
68+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[PRED_STORE_CONTINUE]]
9469
; CHECK: exit:
9570
; CHECK-NEXT: ret void
9671
;
@@ -124,48 +99,23 @@ exit:
12499
define void @redundant_or_2(ptr %dst, i1 %c.0, i1 %c.1) {
125100
; CHECK-LABEL: @redundant_or_2(
126101
; CHECK-NEXT: entry:
127-
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
128-
; CHECK: vector.ph:
129-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0
130-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
131-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0
132-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer
133-
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true)
134-
; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[BROADCAST_SPLAT]], <4 x i1> zeroinitializer
135-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
136-
; CHECK: vector.body:
137-
; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i1> [[TMP0]], <4 x i1> zeroinitializer
138-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
139-
; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
140-
; CHECK: pred.store.if:
141-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
142-
; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4
143-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
144-
; CHECK: pred.store.continue:
145-
; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
146-
; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
147-
; CHECK: pred.store.if3:
148-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1
149-
; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4
150-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
151-
; CHECK: pred.store.continue4:
152-
; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
153-
; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
154-
; CHECK: pred.store.if5:
155-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2
102+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE:%.*]]
103+
; CHECK: loop.header:
104+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[PRED_STORE_IF3:%.*]] ]
105+
; CHECK-NEXT: br i1 [[TMP8:%.*]], label [[PRED_STORE_IF3]], label [[PRED_STORE_CONTINUE4:%.*]]
106+
; CHECK: then.1:
107+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2
108+
; CHECK-NEXT: [[OR:%.*]] = or i1 true, [[CMP]]
109+
; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false
110+
; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[PRED_STORE_IF3]]
111+
; CHECK: then.2:
112+
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[IV]]
156113
; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4
157-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
158-
; CHECK: pred.store.continue6:
159-
; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
160-
; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
161-
; CHECK: pred.store.if7:
162-
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3
163-
; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4
164-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
165-
; CHECK: pred.store.continue8:
166-
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
167-
; CHECK: middle.block:
168-
; CHECK-NEXT: br label [[LOOP_LATCH:%.*]]
114+
; CHECK-NEXT: br label [[PRED_STORE_IF3]]
115+
; CHECK: loop.latch:
116+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
117+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3
118+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[PRED_STORE_CONTINUE]]
169119
; CHECK: exit:
170120
; CHECK-NEXT: ret void
171121
;
@@ -199,49 +149,23 @@ exit:
199149
define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) {
200150
; CHECK-LABEL: @redundant_and_1(
201151
; CHECK-NEXT: entry:
202-
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
203-
; CHECK: vector.ph:
204-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0
205-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
206-
; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
207-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0
208-
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer
209-
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
210-
; CHECK: vector.body:
211-
; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer
212-
; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP0]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer
213-
; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i1> [[TMP2]], <4 x i1> zeroinitializer
214-
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
215-
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
216-
; CHECK: pred.store.if:
217-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0
218-
; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4
219-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
220-
; CHECK: pred.store.continue:
221-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
222-
; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
223-
; CHECK: pred.store.if3:
224-
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1
225-
; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4
226-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
227-
; CHECK: pred.store.continue4:
228-
; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
229-
; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]]
230-
; CHECK: pred.store.if5:
231-
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2
152+
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE:%.*]]
153+
; CHECK: loop.header:
154+
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[PRED_STORE_IF3:%.*]] ]
155+
; CHECK-NEXT: br i1 [[TMP10:%.*]], label [[PRED_STORE_IF3]], label [[PRED_STORE_CONTINUE4:%.*]]
156+
; CHECK: then.1:
157+
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2
158+
; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], false
159+
; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false
160+
; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[PRED_STORE_IF3]]
161+
; CHECK: then.2:
162+
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 [[IV]]
232163
; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4
233-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
234-
; CHECK: pred.store.continue6:
235-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
236-
; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]]
237-
; CHECK: pred.store.if7:
238-
; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3
239-
; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 4
240-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]]
241-
; CHECK: pred.store.continue8:
242-
; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]]
243-
; CHECK: middle.block:
244-
; CHECK-NEXT: br label [[LOOP_LATCH:%.*]]
164+
; CHECK-NEXT: br label [[PRED_STORE_IF3]]
165+
; CHECK: loop.latch:
166+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1
167+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3
168+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[PRED_STORE_CONTINUE]]
245169
; CHECK: exit:
246170
; CHECK-NEXT: ret void
247171
;

llvm/test/Transforms/LoopVectorize/X86/fixed-order-recurrence.ll

Lines changed: 8 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -435,67 +435,16 @@ define void @test_first_order_recurrence_tried_to_scalarized(ptr %dst, i1 %c, i3
435435
; CHECK-LABEL: @test_first_order_recurrence_tried_to_scalarized(
436436
; CHECK-NEXT: entry:
437437
; CHECK-NEXT: [[N:%.*]] = select i1 [[C:%.*]], i32 8, i32 9
438-
; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
439-
; CHECK: vector.ph:
440-
; CHECK-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], 3
441-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
442-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
443-
; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1
444-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
445-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
446438
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
447-
; CHECK: vector.body:
448-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ]
449-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ]
450-
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND]], [[PRED_STORE_CONTINUE6]] ]
451-
; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
452-
; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
453-
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
454-
; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
455-
; CHECK: pred.store.if:
456-
; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0
457-
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds nuw i32, ptr [[DST:%.*]], i32 [[TMP3]]
458-
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i32> [[TMP0]], i32 0
459-
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw i32 10, [[TMP5]]
460-
; CHECK-NEXT: store i32 [[TMP6]], ptr [[TMP4]], align 4
461-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]]
462-
; CHECK: pred.store.continue:
463-
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1
464-
; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2:%.*]]
465-
; CHECK: pred.store.if1:
466-
; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[INDEX]], 1
467-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP8]]
468-
; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i32> [[TMP0]], i32 1
469-
; CHECK-NEXT: [[TMP11:%.*]] = sub nsw i32 10, [[TMP10]]
470-
; CHECK-NEXT: store i32 [[TMP11]], ptr [[TMP9]], align 4
471-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE2]]
472-
; CHECK: pred.store.continue2:
473-
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i32 2
474-
; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]]
475-
; CHECK: pred.store.if3:
476-
; CHECK-NEXT: [[TMP13:%.*]] = add i32 [[INDEX]], 2
477-
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP13]]
478-
; CHECK-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
439+
; CHECK: loop:
440+
; CHECK-NEXT: [[TMP18:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
441+
; CHECK-NEXT: [[TMP15:%.*]] = phi i32 [ 4, [[ENTRY]] ], [ [[TMP18]], [[VECTOR_BODY]] ]
442+
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[TMP18]], 1
479443
; CHECK-NEXT: [[TMP16:%.*]] = sub nsw i32 10, [[TMP15]]
480-
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP14]], align 4
481-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]]
482-
; CHECK: pred.store.continue4:
483-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i32 3
484-
; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6]]
485-
; CHECK: pred.store.if5:
486-
; CHECK-NEXT: [[TMP18:%.*]] = add i32 [[INDEX]], 3
487-
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i32, ptr [[DST]], i32 [[TMP18]]
488-
; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
489-
; CHECK-NEXT: [[TMP21:%.*]] = sub nsw i32 10, [[TMP20]]
490-
; CHECK-NEXT: store i32 [[TMP21]], ptr [[TMP19]], align 4
491-
; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]]
492-
; CHECK: pred.store.continue6:
493-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
494-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
495-
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
496-
; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
497-
; CHECK: middle.block:
498-
; CHECK-NEXT: br label [[LOOP:%.*]]
444+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds nuw i32, ptr [[DST:%.*]], i32 [[TMP18]]
445+
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP19]], align 4
446+
; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]]
447+
; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
499448
; CHECK: exit:
500449
; CHECK-NEXT: ret void
501450
;

0 commit comments

Comments
 (0)