Skip to content

Commit e481816

Browse files
committed
[VPlan] Allow folding not (cmp eq) -> icmp ne with other select users
Currently we only allow folding not (cmp eq) -> icmp ne if the not is the only user of the compare. However a common scenario is that some select might also use the compare. We can still fold the not if we also swizzle the arms of the selects. This helps avoid regressions in llvm#150368
1 parent 2dc0a5f commit e481816

10 files changed

+211
-232
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 21 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1107,13 +1107,29 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) {
11071107
return Def->replaceAllUsesWith(A);
11081108

11091109
// Try to fold Not into compares by adjusting the predicate in-place.
1110-
if (isa<VPWidenRecipe>(A) && A->getNumUsers() == 1) {
1110+
auto CanFold = [&A](VPUser *U) {
1111+
return match(
1112+
U, m_CombineOr(m_Not(m_Specific(A)),
1113+
m_Select(m_Specific(A), m_VPValue(), m_VPValue())));
1114+
};
1115+
if (isa<VPWidenRecipe>(A) && all_of(A->users(), CanFold)) {
11111116
auto *WideCmp = cast<VPWidenRecipe>(A);
11121117
if (WideCmp->getOpcode() == Instruction::ICmp ||
11131118
WideCmp->getOpcode() == Instruction::FCmp) {
11141119
WideCmp->setPredicate(
11151120
CmpInst::getInversePredicate(WideCmp->getPredicate()));
1116-
Def->replaceAllUsesWith(WideCmp);
1121+
for (VPUser *U : WideCmp->users()) {
1122+
auto *R = cast<VPSingleDefRecipe>(U);
1123+
// not (icmp eq) -> icmp ne
1124+
if (match(R, m_Not(m_Specific(WideCmp))))
1125+
R->replaceAllUsesWith(WideCmp);
1126+
// select (icmp eq), x, y -> select (icmp ne), y, x
1127+
else if (match(R, m_Select(m_Specific(WideCmp), m_VPValue(X),
1128+
m_VPValue(Y)))) {
1129+
R->setOperand(1, Y);
1130+
R->setOperand(2, X);
1131+
}
1132+
}
11171133
// If WideCmp doesn't have a debug location, use the one from the
11181134
// negation, to preserve the location.
11191135
if (!WideCmp->getDebugLoc() && R.getDebugLoc())
@@ -1885,7 +1901,6 @@ void VPlanTransforms::truncateToMinimalBitwidths(
18851901
PH->appendRecipe(NewOp);
18861902
}
18871903
}
1888-
18891904
}
18901905
}
18911906
}
@@ -2654,8 +2669,9 @@ void VPlanTransforms::createInterleaveGroups(
26542669
ReversePtr->insertBefore(InsertPos);
26552670
Addr = ReversePtr;
26562671
}
2657-
auto *VPIG = new VPInterleaveRecipe(IG, Addr, StoredValues,
2658-
InsertPos->getMask(), NeedsMaskForGaps, InsertPos->getDebugLoc());
2672+
auto *VPIG =
2673+
new VPInterleaveRecipe(IG, Addr, StoredValues, InsertPos->getMask(),
2674+
NeedsMaskForGaps, InsertPos->getDebugLoc());
26592675
VPIG->insertBefore(InsertPos);
26602676

26612677
unsigned J = 0;

llvm/test/Transforms/LoopVectorize/X86/drop-poison-generating-flags.ll

Lines changed: 28 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,11 @@ define void @drop_scalar_nuw_nsw(ptr noalias nocapture readonly %input, ptr %out
2929
; CHECK: [[VECTOR_BODY]]:
3030
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
3131
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
32-
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
33-
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
32+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
3433
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1
3534
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]]
36-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0:![0-9]+]]
37-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
35+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison), !invariant.load [[META0:![0-9]+]]
36+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer
3837
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]]
3938
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4
4039
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -79,12 +78,11 @@ define void @drop_scalar_gep_nusw(ptr noalias nocapture readonly %input, ptr %ou
7978
; CHECK: [[VECTOR_BODY]]:
8079
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
8180
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
82-
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
83-
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
81+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
8482
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1
8583
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]]
86-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]]
87-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
84+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison), !invariant.load [[META0]]
85+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer
8886
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw float, ptr [[OUTPUT]], i64 [[INDEX]]
8987
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4
9088
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -129,12 +127,11 @@ define void @drop_scalar_gep_nuw(ptr noalias nocapture readonly %input, ptr %out
129127
; CHECK: [[VECTOR_BODY]]:
130128
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
131129
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
132-
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
133-
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
130+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
134131
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[INDEX]], 1
135132
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP2]]
136-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]]
137-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
133+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP3]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison), !invariant.load [[META0]]
134+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer
138135
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nuw float, ptr [[OUTPUT]], i64 [[INDEX]]
139136
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4
140137
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -182,10 +179,9 @@ define void @drop_nonpred_scalar_nuw_nsw(ptr noalias nocapture readonly %input,
182179
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
183180
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[INDEX]], 1
184181
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, ptr [[INPUT]], i64 [[TMP0]]
185-
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
186-
; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true)
187-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP1]], i32 4, <4 x i1> [[TMP3]], <4 x float> poison), !invariant.load [[META0]]
188-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
182+
; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
183+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP1]], i32 4, <4 x i1> [[TMP2]], <4 x float> poison), !invariant.load [[META0]]
184+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer
189185
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]]
190186
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4
191187
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -230,13 +226,12 @@ define void @preserve_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr
230226
; CHECK: [[VECTOR_BODY]]:
231227
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
232228
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
233-
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
234-
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
229+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
235230
; CHECK-NEXT: [[TMP2:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], splat (i64 1)
236231
; CHECK-NEXT: [[TMP3:%.*]] = mul nuw nsw <4 x i64> [[TMP2]], splat (i64 2)
237232
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, ptr [[INPUT]], <4 x i64> [[TMP3]]
238-
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> [[TMP1]], <4 x float> poison), !invariant.load [[META0]]
239-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_GATHER]]
233+
; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> [[TMP4]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison), !invariant.load [[META0]]
234+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[WIDE_MASKED_GATHER]], <4 x float> zeroinitializer
240235
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]]
241236
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP5]], align 4
242237
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -282,15 +277,14 @@ define void @drop_vector_nuw_nsw(ptr noalias nocapture readonly %input, ptr %out
282277
; CHECK: [[VECTOR_BODY]]:
283278
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
284279
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
285-
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
280+
; CHECK-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
286281
; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[INDEX]]
287282
; CHECK-NEXT: [[TMP2:%.*]] = sub <4 x i64> [[VEC_IND]], splat (i64 1)
288283
; CHECK-NEXT: [[TMP3:%.*]] = getelementptr float, ptr [[INPUT]], <4 x i64> [[TMP2]]
289284
; CHECK-NEXT: store <4 x ptr> [[TMP3]], ptr [[TMP1]], align 8
290-
; CHECK-NEXT: [[TMP5:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
291285
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x ptr> [[TMP3]], i32 0
292-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP5]], <4 x float> poison), !invariant.load [[META0]]
293-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
286+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP6]], i32 4, <4 x i1> [[TMP0]], <4 x float> poison), !invariant.load [[META0]]
287+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer
294288
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[INDEX]]
295289
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP8]], align 4
296290
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -343,7 +337,7 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input,
343337
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1
344338
; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2
345339
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3
346-
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
340+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
347341
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[PTRS]], i64 [[TMP0]]
348342
; CHECK-NEXT: [[TMP6:%.*]] = sub i64 [[TMP0]], 1
349343
; CHECK-NEXT: [[TMP7:%.*]] = sub i64 [[TMP1]], 1
@@ -358,9 +352,8 @@ define void @drop_nonvector_nuw_nsw_avx1(ptr noalias nocapture readonly %input,
358352
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x ptr> [[TMP15]], ptr [[TMP12]], i32 2
359353
; CHECK-NEXT: [[TMP17:%.*]] = insertelement <4 x ptr> [[TMP16]], ptr [[TMP13]], i32 3
360354
; CHECK-NEXT: store <4 x ptr> [[TMP17]], ptr [[TMP5]], align 8
361-
; CHECK-NEXT: [[TMP19:%.*]] = xor <4 x i1> [[TMP4]], splat (i1 true)
362-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP10]], i32 4, <4 x i1> [[TMP19]], <4 x float> poison), !invariant.load [[META0]]
363-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> zeroinitializer, <4 x float> [[WIDE_MASKED_LOAD]]
355+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0(ptr [[TMP10]], i32 4, <4 x i1> [[TMP4]], <4 x float> poison), !invariant.load [[META0]]
356+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP4]], <4 x float> [[WIDE_MASKED_LOAD]], <4 x float> zeroinitializer
364357
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[OUTPUT]], i64 [[TMP0]]
365358
; CHECK-NEXT: store <4 x float> [[PREDPHI]], ptr [[TMP21]], align 4
366359
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -720,8 +713,7 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
720713
; CHECK: [[VECTOR_BODY]]:
721714
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
722715
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
723-
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
724-
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
716+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
725717
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
726718
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
727719
; CHECK: [[PRED_LOAD_IF]]:
@@ -765,8 +757,8 @@ define void @pr70590_recipe_without_underlying_instr(i64 %n, ptr noalias %dst) {
765757
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3
766758
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
767759
; CHECK: [[PRED_LOAD_CONTINUE6]]:
768-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ]
769-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]]
760+
; CHECK-NEXT: [[TMP31:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ]
761+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TMP31]], <4 x i8> zeroinitializer
770762
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
771763
; CHECK-NEXT: store <4 x i8> [[PREDPHI]], ptr [[TMP15]], align 4
772764
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
@@ -816,8 +808,7 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst
816808
; CHECK: [[VECTOR_BODY]]:
817809
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6:.*]] ]
818810
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_LOAD_CONTINUE6]] ]
819-
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
820-
; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> [[TMP0]], splat (i1 true)
811+
; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
821812
; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
822813
; CHECK-NEXT: br i1 [[TMP2]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
823814
; CHECK: [[PRED_LOAD_IF]]:
@@ -861,9 +852,9 @@ define void @recipe_without_underlying_instr_lanes_used(i64 %n, ptr noalias %dst
861852
; CHECK-NEXT: [[TMP28:%.*]] = insertelement <4 x i8> [[TMP22]], i8 [[TMP27]], i32 3
862853
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE6]]
863854
; CHECK: [[PRED_LOAD_CONTINUE6]]:
864-
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ]
865-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP0]], <4 x i8> zeroinitializer, <4 x i8> [[WIDE_LOAD]]
866-
; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> zeroinitializer, <4 x i64> poison
855+
; CHECK-NEXT: [[TMP32:%.*]] = phi <4 x i8> [ [[TMP22]], %[[PRED_LOAD_CONTINUE4]] ], [ [[TMP28]], %[[PRED_LOAD_IF5]] ]
856+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP1]], <4 x i8> [[TMP32]], <4 x i8> zeroinitializer
857+
; CHECK-NEXT: [[PREDPHI7:%.*]] = select <4 x i1> [[TMP1]], <4 x i64> poison, <4 x i64> zeroinitializer
867858
; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x i64> [[PREDPHI7]], i32 3
868859
; CHECK-NEXT: store i64 [[TMP12]], ptr [[AUX]], align 8
869860
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]

llvm/test/Transforms/LoopVectorize/X86/pr51366-sunk-instruction-used-outside-of-loop.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,7 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
1717
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i64 [[TMP1]]
1818
; CHECK-NEXT: [[TMP16:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0
1919
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <2 x ptr> [[TMP16]], ptr [[TMP2]], i32 1
20-
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer
21-
; CHECK-NEXT: [[TMP4:%.*]] = xor <2 x i1> [[TMP3]], splat (i1 true)
20+
; CHECK-NEXT: [[TMP4:%.*]] = icmp ne <2 x i64> [[VEC_IND]], zeroinitializer
2221
; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
2322
; CHECK-NEXT: br i1 [[TMP5]], label %[[PRED_LOAD_IF:.*]], label %[[PRED_LOAD_CONTINUE:.*]]
2423
; CHECK: [[PRED_LOAD_IF]]:
@@ -34,8 +33,8 @@ define ptr @test(ptr noalias %src, ptr noalias %dst) {
3433
; CHECK-NEXT: [[TMP12:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP11]], i32 1
3534
; CHECK-NEXT: br label %[[PRED_LOAD_CONTINUE2]]
3635
; CHECK: [[PRED_LOAD_CONTINUE2]]:
37-
; CHECK-NEXT: [[TMP13:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
38-
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP3]], <2 x i32> zeroinitializer, <2 x i32> [[TMP13]]
36+
; CHECK-NEXT: [[TMP15:%.*]] = phi <2 x i32> [ [[TMP9]], %[[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], %[[PRED_LOAD_IF1]] ]
37+
; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP4]], <2 x i32> [[TMP15]], <2 x i32> zeroinitializer
3938
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[DST]], i64 [[TMP0]]
4039
; CHECK-NEXT: store <2 x i32> [[PREDPHI]], ptr [[TMP14]], align 4
4140
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2

0 commit comments

Comments
 (0)