Skip to content

Commit 37fe7a9

Browse files
authored
[LV] Generate scalar xor for VPInstruction::Not if possible. (#152628)
`VPInstruction::Not` which will generate xor instruction is widely used for the exit condition. This patch make `VPInstruction::Not` generate scalar `xor` if possible. This can help reducing the (splat true) in the `xor` and make `xor` be scalar.
1 parent 96775e9 commit 37fe7a9

28 files changed

+182
-188
lines changed

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,6 +517,7 @@ bool VPInstruction::canGenerateScalarForFirstLane() const {
517517
case VPInstruction::PtrAdd:
518518
case VPInstruction::ExplicitVectorLength:
519519
case VPInstruction::AnyOf:
520+
case VPInstruction::Not:
520521
return true;
521522
default:
522523
return false;
@@ -569,7 +570,8 @@ Value *VPInstruction::generate(VPTransformState &State) {
569570

570571
switch (getOpcode()) {
571572
case VPInstruction::Not: {
572-
Value *A = State.get(getOperand(0));
573+
bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this);
574+
Value *A = State.get(getOperand(0), OnlyFirstLaneUsed);
573575
return Builder.CreateNot(A, Name);
574576
}
575577
case Instruction::ExtractElement: {
@@ -1120,6 +1122,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const {
11201122
case Instruction::Select:
11211123
case Instruction::Or:
11221124
case Instruction::Freeze:
1125+
case VPInstruction::Not:
11231126
// TODO: Cover additional opcodes.
11241127
return vputils::onlyFirstLaneUsed(this);
11251128
case VPInstruction::ActiveLaneMask:

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -721,9 +721,9 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
721721
; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr [[NEXT_GEP]], i32 8, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
722722
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
723723
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]])
724-
; PRED-NEXT: [[TMP16:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
725-
; PRED-NEXT: [[TMP17:%.*]] = extractelement <vscale x 2 x i1> [[TMP16]], i32 0
726-
; PRED-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
724+
; PRED-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
725+
; PRED-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true
726+
; PRED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
727727
; PRED: [[MIDDLE_BLOCK]]:
728728
; PRED-NEXT: br label %[[EXIT:.*]]
729729
; PRED: [[SCALAR_PH]]:
@@ -1380,9 +1380,9 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
13801380
; PRED: [[PRED_STORE_CONTINUE27]]:
13811381
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
13821382
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i64(i64 [[INDEX]], i64 [[TMP17]])
1383-
; PRED-NEXT: [[TMP84:%.*]] = xor <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
1383+
; PRED-NEXT: [[TMP84:%.*]] = extractelement <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
1384+
; PRED-NEXT: [[TMP85:%.*]] = xor i1 [[TMP84]], true
13841385
; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
1385-
; PRED-NEXT: [[TMP85:%.*]] = extractelement <8 x i1> [[TMP84]], i32 0
13861386
; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
13871387
; PRED: [[MIDDLE_BLOCK]]:
13881388
; PRED-NEXT: br label %[[EXIT:.*]]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -143,10 +143,10 @@ define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i
143143
; CHECK-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> zeroinitializer, ptr [[TMP34]], i32 8, <vscale x 2 x i1> [[TMP23]])
144144
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
145145
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
146-
; CHECK-NEXT: [[TMP36:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
146+
; CHECK-NEXT: [[TMP35:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
147+
; CHECK-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true
147148
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[DOTSPLAT]]
148-
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <vscale x 2 x i1> [[TMP36]], i32 0
149-
; CHECK-NEXT: br i1 [[TMP37]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
149+
; CHECK-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
150150
; CHECK: [[MIDDLE_BLOCK]]:
151151
; CHECK-NEXT: br label %[[EXIT:.*]]
152152
; CHECK: [[SCALAR_PH]]:
@@ -262,10 +262,10 @@ define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
262262
; CHECK-NEXT: call void @llvm.masked.store.nxv2i64.p0(<vscale x 2 x i64> [[TMP23]], ptr [[TMP38]], i32 4, <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
263263
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]]
264264
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP14]])
265-
; CHECK-NEXT: [[TMP47:%.*]] = xor <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
265+
; CHECK-NEXT: [[TMP39:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
266+
; CHECK-NEXT: [[TMP40:%.*]] = xor i1 [[TMP39]], true
266267
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT4]]
267-
; CHECK-NEXT: [[TMP48:%.*]] = extractelement <vscale x 2 x i1> [[TMP47]], i32 0
268-
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
268+
; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
269269
; CHECK: [[MIDDLE_BLOCK]]:
270270
; CHECK-NEXT: br label %[[EXIT:.*]]
271271
; CHECK: [[SCALAR_PH]]:

llvm/test/Transforms/LoopVectorize/AArch64/first-order-recurrence-fold-tail.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ define i32 @test_phi_iterator_invalidation(ptr %A, ptr noalias %B) {
6464
; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> [[TMP26]], ptr [[TMP28]], i32 4, <4 x i1> [[ACTIVE_LANE_MASK]])
6565
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
6666
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX_NEXT]], i64 1002)
67-
; CHECK-NEXT: [[TMP30:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
67+
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
68+
; CHECK-NEXT: [[TMP30:%.*]] = xor i1 [[TMP29]], true
6869
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
69-
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x i1> [[TMP30]], i32 0
70-
; CHECK-NEXT: br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
70+
; CHECK-NEXT: br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
7171
; CHECK: middle.block:
7272
; CHECK-NEXT: br label [[EXIT:%.*]]
7373
; CHECK: scalar.ph:

llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,7 @@ define float @fmaxnum(ptr %src, i64 %n) {
7575
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]])
7676
; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]])
7777
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
78-
; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
79-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0
78+
; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP6]], true
8079
; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[CMP_N]], [[TMP16]]
8180
; CHECK-NEXT: br i1 [[TMP17]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
8281
; CHECK: [[SCALAR_PH]]:

llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,7 @@ define float @fminnum(ptr %src, i64 %n) {
7575
; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]])
7676
; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[RDX_MINMAX_SELECT]])
7777
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
78-
; CHECK-NEXT: [[TMP15:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true)
79-
; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP15]], i32 0
78+
; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP6]], true
8079
; CHECK-NEXT: [[TMP17:%.*]] = and i1 [[CMP_N]], [[TMP16]]
8180
; CHECK-NEXT: br i1 [[TMP17]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
8281
; CHECK: [[SCALAR_PH]]:

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -126,9 +126,9 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
126126
; PRED-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP23]], ptr [[TMP26]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
127127
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP5]]
128128
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX]], i64 [[TMP15]])
129-
; PRED-NEXT: [[TMP28:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
130-
; PRED-NEXT: [[TMP29:%.*]] = extractelement <vscale x 16 x i1> [[TMP28]], i32 0
131-
; PRED-NEXT: br i1 [[TMP29]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
129+
; PRED-NEXT: [[TMP25:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
130+
; PRED-NEXT: [[TMP27:%.*]] = xor i1 [[TMP25]], true
131+
; PRED-NEXT: br i1 [[TMP27]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
132132
; PRED: [[MIDDLE_BLOCK]]:
133133
; PRED-NEXT: br label %[[EXIT:.*]]
134134
; PRED: [[SCALAR_PH]]:
@@ -293,9 +293,9 @@ define void @iv_trunc(i32 %x, ptr %dst, i64 %N) #0 {
293293
; PRED: [[PRED_STORE_CONTINUE2]]:
294294
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
295295
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP15]])
296-
; PRED-NEXT: [[TMP24:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
296+
; PRED-NEXT: [[TMP24:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
297+
; PRED-NEXT: [[TMP25:%.*]] = xor i1 [[TMP24]], true
297298
; PRED-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
298-
; PRED-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[TMP24]], i32 0
299299
; PRED-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
300300
; PRED: [[MIDDLE_BLOCK]]:
301301
; PRED-NEXT: br label %[[EXIT:.*]]
@@ -482,9 +482,9 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
482482
; PRED: [[PRED_STORE_CONTINUE7]]:
483483
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
484484
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP16]])
485-
; PRED-NEXT: [[TMP35:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
485+
; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
486+
; PRED-NEXT: [[TMP36:%.*]] = xor i1 [[TMP35]], true
486487
; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
487-
; PRED-NEXT: [[TMP36:%.*]] = extractelement <4 x i1> [[TMP35]], i32 0
488488
; PRED-NEXT: br i1 [[TMP36]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
489489
; PRED: [[MIDDLE_BLOCK]]:
490490
; PRED-NEXT: br label %[[EXIT:.*]]
@@ -673,9 +673,9 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
673673
; PRED: [[PRED_STORE_CONTINUE6]]:
674674
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
675675
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 [[INDEX]], i64 [[TMP15]])
676-
; PRED-NEXT: [[TMP34:%.*]] = xor <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
676+
; PRED-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
677+
; PRED-NEXT: [[TMP35:%.*]] = xor i1 [[TMP34]], true
677678
; PRED-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
678-
; PRED-NEXT: [[TMP35:%.*]] = extractelement <4 x i1> [[TMP34]], i32 0
679679
; PRED-NEXT: br i1 [[TMP35]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
680680
; PRED: [[MIDDLE_BLOCK]]:
681681
; PRED-NEXT: br label %[[EXIT:.*]]

llvm/test/Transforms/LoopVectorize/AArch64/low_trip_count_predicates.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -422,9 +422,9 @@ define void @overflow_indvar_known_false(ptr nocapture noundef %p, i32 noundef %
422422
; CHECK-NEXT: call void @llvm.masked.store.nxv16i8.p0(<vscale x 16 x i8> [[TMP15]], ptr [[TMP13]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]])
423423
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP3]]
424424
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 16 x i1> @llvm.get.active.lane.mask.nxv16i1.i64(i64 [[INDEX_NEXT]], i64 [[TMP1]])
425-
; CHECK-NEXT: [[TMP16:%.*]] = xor <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
426-
; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 16 x i1> [[TMP16]], i32 0
427-
; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
425+
; CHECK-NEXT: [[TMP30:%.*]] = extractelement <vscale x 16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
426+
; CHECK-NEXT: [[TMP31:%.*]] = xor i1 [[TMP30]], true
427+
; CHECK-NEXT: br i1 [[TMP31]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
428428
; CHECK: [[MIDDLE_BLOCK]]:
429429
; CHECK-NEXT: br label %[[WHILE_END_LOOPEXIT:.*]]
430430
; CHECK: [[SCALAR_PH]]:

llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
8787
; TFCOMMON: pred.store.continue2:
8888
; TFCOMMON-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
8989
; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]])
90-
; TFCOMMON-NEXT: [[TMP18:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
91-
; TFCOMMON-NEXT: [[TMP17:%.*]] = extractelement <2 x i1> [[TMP18]], i32 0
90+
; TFCOMMON-NEXT: [[TMP15:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
91+
; TFCOMMON-NEXT: [[TMP17:%.*]] = xor i1 [[TMP15]], true
9292
; TFCOMMON-NEXT: br i1 [[TMP17]], label [[END:%.*]], label [[LOOP]], !llvm.loop [[LOOP0:![0-9]+]]
9393
; TFCOMMON: end:
9494
; TFCOMMON-NEXT: ret void
@@ -153,9 +153,9 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 {
153153
; TFA_INTERLEAVE-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 2
154154
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[INDEX]], i64 [[TMP3]])
155155
; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK_NEXT10]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i64(i64 [[TMP27]], i64 [[TMP3]])
156-
; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
157-
; TFA_INTERLEAVE-NEXT: [[TMP30:%.*]] = extractelement <2 x i1> [[TMP28]], i32 0
158-
; TFA_INTERLEAVE-NEXT: br i1 [[TMP30]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
156+
; TFA_INTERLEAVE-NEXT: [[TMP26:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
157+
; TFA_INTERLEAVE-NEXT: [[TMP28:%.*]] = xor i1 [[TMP26]], true
158+
; TFA_INTERLEAVE-NEXT: br i1 [[TMP28]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
159159
; TFA_INTERLEAVE: end:
160160
; TFA_INTERLEAVE-NEXT: ret void
161161
;

0 commit comments

Comments
 (0)