Skip to content

Commit 4be0a04

Browse files
committed
[VPlan] Make VPInstruction::AnyOf poison-safe. (llvm#154156)
AnyOf reduces multiple input vectors to a single boolean value. When used for early-exit vectorization, we need to consider any lane after the early exit being poison. Any poison lane would result in poison after the AnyOf reduction. To prevent this, freeze all inputs to AnyOf. Fixes llvm#153946. Fixes llvm#155162. https://alive2.llvm.org/ce/z/FD-XxA PR: llvm#154156 (cherry picked from commit f492eb9)
1 parent b18c2ca commit 4be0a04

18 files changed

+667
-100
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -994,7 +994,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
994994
// Returns a scalar boolean value, which is true if any lane of its
995995
// (boolean) vector operands is true. It produces the reduced value across
996996
// all unrolled iterations. Unrolling will add all copies of its original
997-
// operand as additional operands.
997+
// operand as additional operands. AnyOf is poison-safe as all operands
998+
// will be frozen.
998999
AnyOf,
9991000
// Calculates the first active lane index of the vector predicate operands.
10001001
// It produces the lane index across all unrolled iterations. Unrolling will

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -858,9 +858,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
858858
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
859859
}
860860
case VPInstruction::AnyOf: {
861-
Value *Res = State.get(getOperand(0));
861+
Value *Res = Builder.CreateFreeze(State.get(getOperand(0)));
862862
for (VPValue *Op : drop_begin(operands()))
863-
Res = Builder.CreateOr(Res, State.get(Op));
863+
Res = Builder.CreateOr(Res, Builder.CreateFreeze(State.get(Op)));
864864
return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
865865
}
866866
case VPInstruction::FirstActiveLane: {

llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ define float @fmaxnum(ptr %src, i64 %n) {
6363
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6464
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
6565
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
66-
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]]
66+
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP3]]
67+
; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP4]]
68+
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP18]], [[TMP15]]
6769
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
6870
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0
6971
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ define float @fminnum(ptr %src, i64 %n) {
6363
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6464
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
6565
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
66-
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]]
66+
; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP3]]
67+
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]]
68+
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]]
6769
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
6870
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0
6971
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
3737
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP14]], align 1
3838
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
3939
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]]
40-
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP16]])
40+
; CHECK-NEXT: [[TMP8:%.*]] = freeze <vscale x 16 x i1> [[TMP16]]
41+
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP8]])
4142
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
4243
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
4344
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -118,7 +119,8 @@ define i64 @same_exit_block_pre_inc_use4() {
118119
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP2]], align 1
119120
; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <2 x i64> [[VEC_IND]], [[WIDE_LOAD]]
120121
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 2
121-
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]])
122+
; CHECK-NEXT: [[TMP3:%.*]] = freeze <2 x i1> [[TMP4]]
123+
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP3]])
122124
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
123125
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
124126
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
@@ -194,7 +196,8 @@ define i64 @loop_contains_safe_call() #1 {
194196
; CHECK-NEXT: [[TMP3:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD]])
195197
; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast oge <4 x float> [[TMP3]], splat (float 3.000000e+00)
196198
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
197-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
199+
; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP5]]
200+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
198201
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
199202
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
200203
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -281,7 +284,8 @@ define i64 @loop_contains_safe_div() #1 {
281284
; CHECK-NEXT: [[TMP13:%.*]] = udiv <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 20000)
282285
; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <vscale x 4 x i32> [[TMP13]], splat (i32 1)
283286
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX2]], [[TMP5]]
284-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
287+
; CHECK-NEXT: [[TMP9:%.*]] = freeze <vscale x 4 x i1> [[TMP15]]
288+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]])
285289
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]]
286290
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
287291
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -363,7 +367,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
363367
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i64, ptr [[TMP4]], i32 0
364368
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP5]], align 8
365369
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
366-
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
370+
; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP6]]
371+
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
367372
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
368373
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
369374
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -464,7 +469,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
464469
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP13]], align 4
465470
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
466471
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
467-
; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]])
472+
; CHECK-NEXT: [[TMP20:%.*]] = freeze <4 x i1> [[TMP14]]
473+
; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP20]])
468474
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
469475
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
470476
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll

Lines changed: 12 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,25 @@ define i64 @same_exit_block_pre_inc_use1() #0 {
2929
; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT3:%.*]], [[LOOP]] ]
3030
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 3, [[INDEX1]]
3131
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[OFFSET_IDX]]
32-
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
33-
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP8]], align 1
34-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
35-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[TMP9]], i32 0
36-
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 16 x i8>, ptr [[TMP10]], align 1
37-
; CHECK-NEXT: [[TMP32:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD4]], [[WIDE_LOAD8]]
32+
; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr [[TMP7]], i32 0
33+
; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <vscale x 16 x i8>, ptr [[TMP28]], align 1
34+
; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[OFFSET_IDX]]
35+
; CHECK-NEXT: [[TMP26:%.*]] = getelementptr inbounds i8, ptr [[TMP29]], i32 0
36+
; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <vscale x 16 x i8>, ptr [[TMP26]], align 1
37+
; CHECK-NEXT: [[TMP59:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD4]], [[WIDE_LOAD8]]
3838
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP5]]
39-
; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP32]])
40-
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
41-
; CHECK-NEXT: [[TMP14:%.*]] = or i1 [[TMP12]], [[TMP13]]
42-
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
39+
; CHECK-NEXT: [[TMP34:%.*]] = freeze <vscale x 16 x i1> [[TMP59]]
40+
; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP34]])
41+
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
42+
; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP12]], [[TMP35]]
43+
; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_SPLIT:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
4344
; CHECK: middle.split:
4445
; CHECK-NEXT: br i1 [[TMP12]], label [[VECTOR_EARLY_EXIT:%.*]], label [[LOOP_INC:%.*]]
4546
; CHECK: middle.block:
4647
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 510, [[N_VEC]]
4748
; CHECK-NEXT: br i1 [[CMP_N]], label [[LOOP_END:%.*]], label [[SCALAR_PH]]
4849
; CHECK: vector.early.exit:
49-
; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP32]], i1 true)
50+
; CHECK-NEXT: [[TMP61:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1(<vscale x 16 x i1> [[TMP59]], i1 true)
5051
; CHECK-NEXT: [[TMP16:%.*]] = add i64 [[INDEX1]], [[TMP61]]
5152
; CHECK-NEXT: [[TMP17:%.*]] = add i64 3, [[TMP16]]
5253
; CHECK-NEXT: br label [[LOOP_END]]

llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,9 @@ define float @fmaxnum(ptr %src, i64 %n) {
6363
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6464
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
6565
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
66-
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]]
66+
; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP3]]
67+
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]]
68+
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]]
6769
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
6870
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0
6971
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,8 @@ define float @fmaxnum_1(ptr %src, i64 %n) {
208208
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
209209
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
210210
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
211-
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
211+
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
212+
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
212213
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0
213214
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
214215
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]
@@ -276,7 +277,8 @@ define float @fmaxnum_2(ptr %src, i64 %n) {
276277
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
277278
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
278279
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
279-
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
280+
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
281+
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
280282
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0
281283
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
282284
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]
@@ -346,7 +348,8 @@ define float @fmaxnum_induction_starts_at_10(ptr %src, i64 %n) {
346348
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
347349
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
348350
; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
349-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
351+
; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]]
352+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]])
350353
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0
351354
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
352355
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]]
@@ -417,7 +420,8 @@ define float @fmaxnum_induction_starts_at_value(ptr %src, i64 %start, i64 %n) {
417420
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
418421
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
419422
; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
420-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
423+
; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]]
424+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]])
421425
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0
422426
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
423427
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]]

llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,8 @@ define float @fminnum_1(ptr %src, i64 %n) {
208208
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
209209
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
210210
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
211-
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
211+
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
212+
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
212213
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0
213214
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
214215
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]
@@ -276,7 +277,8 @@ define float @fminnum_2(ptr %src, i64 %n) {
276277
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
277278
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
278279
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
279-
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
280+
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
281+
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
280282
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0
281283
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
282284
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]

0 commit comments

Comments
 (0)