Skip to content

Commit f492eb9

Browse files
authored
[VPlan] Make VPInstruction::AnyOf poison-safe. (#154156)
AnyOf reduces multiple input vectors to a single boolean value. When used for early-exit vectorization, we need to consider any lane after the early exit being poison. Any poison lane would result in poison after the AnyOf reduction. To prevent this, freeze all inputs to AnyOf. Fixes #153946. Fixes #155162. https://alive2.llvm.org/ce/z/FD-XxA PR: #154156
1 parent d12829d commit f492eb9

18 files changed

+213
-99
lines changed

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1014,7 +1014,8 @@ class LLVM_ABI_FOR_TEST VPInstruction : public VPRecipeWithIRFlags,
10141014
// Returns a scalar boolean value, which is true if any lane of its
10151015
// (boolean) vector operands is true. It produces the reduced value across
10161016
// all unrolled iterations. Unrolling will add all copies of its original
1017-
// operand as additional operands.
1017+
// operand as additional operands. AnyOf is poison-safe as all operands
1018+
// will be frozen.
10181019
AnyOf,
10191020
// Calculates the first active lane index of the vector predicate operands.
10201021
// It produces the lane index across all unrolled iterations. Unrolling will

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -875,9 +875,9 @@ Value *VPInstruction::generate(VPTransformState &State) {
875875
return Builder.CreatePtrAdd(Ptr, Addend, Name, getGEPNoWrapFlags());
876876
}
877877
case VPInstruction::AnyOf: {
878-
Value *Res = State.get(getOperand(0));
878+
Value *Res = Builder.CreateFreeze(State.get(getOperand(0)));
879879
for (VPValue *Op : drop_begin(operands()))
880-
Res = Builder.CreateOr(Res, State.get(Op));
880+
Res = Builder.CreateOr(Res, Builder.CreateFreeze(State.get(Op)));
881881
return State.VF.isScalar() ? Res : Builder.CreateOrReduce(Res);
882882
}
883883
case VPInstruction::ExtractLane: {

llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ define float @fmaxnum(ptr %src, i64 %n) {
6262
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6363
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
6464
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
65-
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]]
65+
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP3]]
66+
; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP4]]
67+
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP18]], [[TMP15]]
6668
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
6769
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0
6870
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ define float @fminnum(ptr %src, i64 %n) {
6262
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6363
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
6464
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
65-
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]]
65+
; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP3]]
66+
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]]
67+
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]]
6668
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
6769
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0
6870
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/AArch64/simple_early_exit.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ define i64 @same_exit_block_pre_inc_use1() #1 {
3333
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <vscale x 16 x i8>, ptr [[TMP13]], align 1
3434
; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD]], [[WIDE_LOAD2]]
3535
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP3]]
36-
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP16]])
36+
; CHECK-NEXT: [[TMP8:%.*]] = freeze <vscale x 16 x i1> [[TMP16]]
37+
; CHECK-NEXT: [[TMP17:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP8]])
3738
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
3839
; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
3940
; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -113,7 +114,8 @@ define i64 @same_exit_block_pre_inc_use4() {
113114
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP1]], align 1
114115
; CHECK-NEXT: [[TMP4:%.*]] = icmp uge <2 x i64> [[VEC_IND]], [[WIDE_LOAD]]
115116
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 2
116-
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP4]])
117+
; CHECK-NEXT: [[TMP2:%.*]] = freeze <2 x i1> [[TMP4]]
118+
; CHECK-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> [[TMP2]])
117119
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
118120
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
119121
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
@@ -187,7 +189,8 @@ define i64 @loop_contains_safe_call() #1 {
187189
; CHECK-NEXT: [[TMP3:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[WIDE_LOAD]])
188190
; CHECK-NEXT: [[TMP5:%.*]] = fcmp fast oge <4 x float> [[TMP3]], splat (float 3.000000e+00)
189191
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
190-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
192+
; CHECK-NEXT: [[TMP4:%.*]] = freeze <4 x i1> [[TMP5]]
193+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP4]])
191194
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 64
192195
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
193196
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
@@ -270,7 +273,8 @@ define i64 @loop_contains_safe_div() #1 {
270273
; CHECK-NEXT: [[TMP13:%.*]] = udiv <vscale x 4 x i32> [[WIDE_LOAD]], splat (i32 20000)
271274
; CHECK-NEXT: [[TMP15:%.*]] = icmp ne <vscale x 4 x i32> [[TMP13]], splat (i32 1)
272275
; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX2]], [[TMP3]]
273-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP15]])
276+
; CHECK-NEXT: [[TMP9:%.*]] = freeze <vscale x 4 x i1> [[TMP15]]
277+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.nxv4i1(<vscale x 4 x i1> [[TMP9]])
274278
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[INDEX1]]
275279
; CHECK-NEXT: [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
276280
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
@@ -350,7 +354,8 @@ define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(
350354
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i64, ptr [[P2]], i64 [[OFFSET_IDX]]
351355
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i64>, ptr [[TMP4]], align 8
352356
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], 4
353-
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
357+
; CHECK-NEXT: [[TMP3:%.*]] = freeze <4 x i1> [[TMP6]]
358+
; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP3]])
354359
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT3]], 64
355360
; CHECK-NEXT: [[TMP9:%.*]] = or i1 [[TMP7]], [[TMP8]]
356361
; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
@@ -448,7 +453,8 @@ define i32 @diff_exit_block_needs_scev_check(i32 %end) {
448453
; CHECK-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP12]], align 4
449454
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[WIDE_LOAD2]]
450455
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
451-
; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]])
456+
; CHECK-NEXT: [[TMP13:%.*]] = freeze <4 x i1> [[TMP14]]
457+
; CHECK-NEXT: [[TMP15:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP13]])
452458
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
453459
; CHECK-NEXT: [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
454460
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_SPLIT:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/single-early-exit-interleave.ll

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,13 @@ define i64 @same_exit_block_pre_inc_use1() #0 {
6060
; CHECK-NEXT: [[TMP31:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD3]], [[WIDE_LOAD7]]
6161
; CHECK-NEXT: [[TMP59:%.*]] = icmp ne <vscale x 16 x i8> [[WIDE_LOAD4]], [[WIDE_LOAD8]]
6262
; CHECK-NEXT: [[INDEX_NEXT3]] = add nuw i64 [[INDEX1]], [[TMP3]]
63-
; CHECK-NEXT: [[TMP34:%.*]] = or <vscale x 16 x i1> [[TMP32]], [[TMP30]]
64-
; CHECK-NEXT: [[TMP37:%.*]] = or <vscale x 16 x i1> [[TMP34]], [[TMP31]]
65-
; CHECK-NEXT: [[TMP33:%.*]] = or <vscale x 16 x i1> [[TMP37]], [[TMP59]]
63+
; CHECK-NEXT: [[TMP37:%.*]] = freeze <vscale x 16 x i1> [[TMP32]]
64+
; CHECK-NEXT: [[TMP38:%.*]] = freeze <vscale x 16 x i1> [[TMP30]]
65+
; CHECK-NEXT: [[TMP54:%.*]] = or <vscale x 16 x i1> [[TMP37]], [[TMP38]]
66+
; CHECK-NEXT: [[TMP60:%.*]] = freeze <vscale x 16 x i1> [[TMP31]]
67+
; CHECK-NEXT: [[TMP62:%.*]] = or <vscale x 16 x i1> [[TMP54]], [[TMP60]]
68+
; CHECK-NEXT: [[TMP34:%.*]] = freeze <vscale x 16 x i1> [[TMP59]]
69+
; CHECK-NEXT: [[TMP33:%.*]] = or <vscale x 16 x i1> [[TMP62]], [[TMP34]]
6670
; CHECK-NEXT: [[TMP12:%.*]] = call i1 @llvm.vector.reduce.or.nxv16i1(<vscale x 16 x i1> [[TMP33]])
6771
; CHECK-NEXT: [[TMP35:%.*]] = icmp eq i64 [[INDEX_NEXT3]], [[N_VEC]]
6872
; CHECK-NEXT: [[TMP36:%.*]] = or i1 [[TMP12]], [[TMP35]]

llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,9 @@ define float @fmaxnum(ptr %src, i64 %n) {
6262
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6363
; CHECK-NEXT: [[TMP3:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
6464
; CHECK-NEXT: [[TMP4:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD2]], [[WIDE_LOAD2]]
65-
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP3]], [[TMP4]]
65+
; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP3]]
66+
; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]]
67+
; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]]
6668
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
6769
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0
6870
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer

llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ define float @fmaxnum_1(ptr %src, i64 %n) {
207207
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
208208
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
209209
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
210-
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
210+
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
211+
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
211212
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0
212213
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
213214
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]
@@ -273,7 +274,8 @@ define float @fmaxnum_2(ptr %src, i64 %n) {
273274
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
274275
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
275276
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
276-
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
277+
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
278+
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
277279
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0
278280
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
279281
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]
@@ -341,7 +343,8 @@ define float @fmaxnum_induction_starts_at_10(ptr %src, i64 %n) {
341343
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
342344
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
343345
; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
344-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
346+
; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]]
347+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]])
345348
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0
346349
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
347350
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]]
@@ -410,7 +413,8 @@ define float @fmaxnum_induction_starts_at_value(ptr %src, i64 %start, i64 %n) {
410413
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
411414
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
412415
; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
413-
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]])
416+
; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]]
417+
; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]])
414418
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0
415419
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
416420
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]]

llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,8 @@ define float @fminnum_1(ptr %src, i64 %n) {
207207
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
208208
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
209209
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
210-
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
210+
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
211+
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
211212
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0
212213
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
213214
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]
@@ -273,7 +274,8 @@ define float @fminnum_2(ptr %src, i64 %n) {
273274
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[IV]], 4
274275
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
275276
; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]]
276-
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP2]])
277+
; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]]
278+
; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]])
277279
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0
278280
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
279281
; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]]

llvm/test/Transforms/LoopVectorize/single-early-exit-cond-poison.ll

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
; Test case from https://github.com/llvm/llvm-project/issues/153946.
66
; %shr and thus %early.cond will be poison from %iv == 4 onwards.
7-
; TODO: Make sure the mask being poison does not propagate across lanes in the
7+
; Make sure the mask being poison does not propagate across lanes in the
88
; OR reduction when computing the early exit condition in the vector loop.
99
define noundef i32 @f(i32 noundef %g) {
1010
; VF4IC2-LABEL: define noundef i32 @f(
@@ -20,7 +20,9 @@ define noundef i32 @f(i32 noundef %g) {
2020
; VF4IC2-NEXT: [[TMP3:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], <i32 32, i32 40, i32 48, i32 56>
2121
; VF4IC2-NEXT: [[TMP4:%.*]] = icmp ne <4 x i32> [[TMP2]], zeroinitializer
2222
; VF4IC2-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP3]], zeroinitializer
23-
; VF4IC2-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP4]], [[TMP5]]
23+
; VF4IC2-NEXT: [[TMP17:%.*]] = freeze <4 x i1> [[TMP4]]
24+
; VF4IC2-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP5]]
25+
; VF4IC2-NEXT: [[TMP6:%.*]] = or <4 x i1> [[TMP17]], [[TMP18]]
2426
; VF4IC2-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP6]])
2527
; VF4IC2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
2628
; VF4IC2: [[MIDDLE_SPLIT]]:
@@ -65,7 +67,8 @@ define noundef i32 @f(i32 noundef %g) {
6567
; VF8IC1: [[VECTOR_BODY]]:
6668
; VF8IC1-NEXT: [[TMP1:%.*]] = ashr <8 x i32> [[BROADCAST_SPLAT]], <i32 0, i32 8, i32 16, i32 24, i32 32, i32 40, i32 48, i32 56>
6769
; VF8IC1-NEXT: [[TMP2:%.*]] = icmp ne <8 x i32> [[TMP1]], zeroinitializer
68-
; VF8IC1-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP2]])
70+
; VF8IC1-NEXT: [[TMP8:%.*]] = freeze <8 x i1> [[TMP2]]
71+
; VF8IC1-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP8]])
6972
; VF8IC1-NEXT: br label %[[MIDDLE_SPLIT:.*]]
7073
; VF8IC1: [[MIDDLE_SPLIT]]:
7174
; VF8IC1-NEXT: [[TMP4:%.*]] = extractelement <8 x i32> [[TMP1]], i32 7

0 commit comments

Comments
 (0)