Skip to content

Commit dd42541

Browse files
committed
[VPlan] Narrow VPWidenCastRecipe to scalar cast recipe.
1 parent 1200453 commit dd42541

File tree

7 files changed

+154
-86
lines changed

7 files changed

+154
-86
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1386,7 +1386,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
13861386
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
13871387
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
13881388
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
1389-
if (!isa<VPWidenRecipe, VPWidenSelectRecipe, VPReplicateRecipe>(&R))
1389+
if (!isa<VPWidenRecipe, VPWidenCastRecipe, VPWidenSelectRecipe,
1390+
VPReplicateRecipe>(&R))
13901391
continue;
13911392
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
13921393
if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
@@ -1422,6 +1423,15 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
14221423
}))
14231424
continue;
14241425

1426+
if (auto *CastR = dyn_cast<VPWidenCastRecipe>(RepOrWidenR)) {
1427+
VPBuilder Builder(CastR);
1428+
auto *Clone = Builder.createScalarCast(
1429+
CastR->getOpcode(), CastR->getOperand(0), CastR->getResultType(),
1430+
CastR->getDebugLoc());
1431+
CastR->replaceAllUsesWith(Clone);
1432+
CastR->eraseFromParent();
1433+
continue;
1434+
}
14251435
auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
14261436
RepOrWidenR->operands(),
14271437
true /*IsSingleScalar*/);

llvm/test/Transforms/LoopVectorize/AArch64/partial-reduce-constant-ops.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -492,18 +492,18 @@ define i64 @partial_reduction_mul_two_users(i64 %n, ptr %a, i16 %b, i32 %c) {
492492
; CHECK-NEXT: [[TMP3:%.*]] = zext <8 x i32> [[TMP2]] to <8 x i64>
493493
; CHECK-NEXT: [[PARTIAL_REDUCE]] = call <4 x i64> @llvm.vector.partial.reduce.add.v4i64.v8i64(<4 x i64> [[VEC_PHI]], <8 x i64> [[TMP3]])
494494
; CHECK-NEXT: [[TMP5:%.*]] = sext <8 x i16> [[BROADCAST_SPLAT2]] to <8 x i32>
495-
; CHECK-NEXT: [[TMP6:%.*]] = sext <8 x i32> [[TMP5]] to <8 x i64>
495+
; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i32> [[TMP5]], i32 0
496+
; CHECK-NEXT: [[TMP9:%.*]] = sext i32 [[TMP6]] to i64
496497
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
497498
; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
498-
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
499+
; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
499500
; CHECK: [[MIDDLE_BLOCK]]:
500501
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[PARTIAL_REDUCE]])
501-
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <8 x i64> [[TMP6]], i32 7
502502
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
503503
; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
504504
; CHECK: [[SCALAR_PH]]:
505505
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
506-
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
506+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[TMP9]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
507507
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP8]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
508508
; CHECK-NEXT: br label %[[LOOP:.*]]
509509
; CHECK: [[LOOP]]:
@@ -520,7 +520,7 @@ define i64 @partial_reduction_mul_two_users(i64 %n, ptr %a, i16 %b, i32 %c) {
520520
; CHECK-NEXT: [[LOAD_EXT:%.*]] = sext i16 [[LOAD]] to i32
521521
; CHECK-NEXT: [[LOAD_EXT_EXT]] = sext i32 [[LOAD_EXT]] to i64
522522
; CHECK-NEXT: [[EXITCOND740_NOT:%.*]] = icmp eq i64 [[IV]], [[N]]
523-
; CHECK-NEXT: br i1 [[EXITCOND740_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP19:![0-9]+]]
523+
; CHECK-NEXT: br i1 [[EXITCOND740_NOT]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP17:![0-9]+]]
524524
; CHECK: [[EXIT]]:
525525
; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], %[[LOOP]] ], [ [[TMP8]], %[[MIDDLE_BLOCK]] ]
526526
; CHECK-NEXT: ret i64 [[ADD_LCSSA]]

llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll

Lines changed: 119 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -66,57 +66,150 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8
6666
; CHECK-NEXT: [[CONFLICT_RDX15:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT14]]
6767
; CHECK-NEXT: br i1 [[CONFLICT_RDX15]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
6868
; CHECK: [[VECTOR_PH]]:
69-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 0, i32 [[TMP2]])
69+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 0, i32 [[TMP2]])
7070
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
7171
; CHECK: [[VECTOR_BODY]]:
72-
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE22:.*]] ]
73-
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ]
74-
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i8> [ <i8 0, i8 4, i8 8, i8 12>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE22]] ]
75-
; CHECK-NEXT: [[TMP28:%.*]] = load i8, ptr [[SRC]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
76-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[TMP28]], i64 0
77-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer
78-
; CHECK-NEXT: [[TMP25:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i64>
79-
; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i64> [[TMP25]], i32 0
72+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE46:.*]] ]
73+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <16 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[PRED_STORE_CONTINUE46]] ]
74+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i8> [ <i8 0, i8 4, i8 8, i8 12, i8 16, i8 20, i8 24, i8 28, i8 32, i8 36, i8 40, i8 44, i8 48, i8 52, i8 56, i8 60>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE46]] ]
75+
; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[SRC]], align 1, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]]
76+
; CHECK-NEXT: [[TMP29:%.*]] = zext i8 [[TMP25]] to i64
8077
; CHECK-NEXT: [[TMP30:%.*]] = or i64 [[TMP29]], 1
81-
; CHECK-NEXT: [[TMP26:%.*]] = zext <4 x i8> [[VEC_IND]] to <4 x i64>
82-
; CHECK-NEXT: [[TMP27:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 0
83-
; CHECK-NEXT: br i1 [[TMP27]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
78+
; CHECK-NEXT: [[TMP27:%.*]] = zext <16 x i8> [[VEC_IND]] to <16 x i64>
79+
; CHECK-NEXT: [[TMP28:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 0
80+
; CHECK-NEXT: br i1 [[TMP28]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
8481
; CHECK: [[PRED_STORE_IF]]:
85-
; CHECK-NEXT: [[TMP102:%.*]] = extractelement <4 x i64> [[TMP26]], i32 0
82+
; CHECK-NEXT: [[TMP102:%.*]] = extractelement <16 x i64> [[TMP27]], i32 0
8683
; CHECK-NEXT: [[TMP103:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP102]], i64 [[OFF]]
8784
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP103]], align 8, !alias.scope [[META3]]
8885
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE]]
8986
; CHECK: [[PRED_STORE_CONTINUE]]:
90-
; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 1
91-
; CHECK-NEXT: br i1 [[TMP32]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
87+
; CHECK-NEXT: [[TMP31:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 1
88+
; CHECK-NEXT: br i1 [[TMP31]], label %[[PRED_STORE_IF17:.*]], label %[[PRED_STORE_CONTINUE18:.*]]
9289
; CHECK: [[PRED_STORE_IF17]]:
93-
; CHECK-NEXT: [[TMP108:%.*]] = extractelement <4 x i64> [[TMP26]], i32 1
90+
; CHECK-NEXT: [[TMP108:%.*]] = extractelement <16 x i64> [[TMP27]], i32 1
9491
; CHECK-NEXT: [[TMP109:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP108]], i64 [[OFF]]
9592
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP109]], align 8, !alias.scope [[META3]]
9693
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE18]]
9794
; CHECK: [[PRED_STORE_CONTINUE18]]:
98-
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 2
99-
; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
95+
; CHECK-NEXT: [[TMP34:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 2
96+
; CHECK-NEXT: br i1 [[TMP34]], label %[[PRED_STORE_IF19:.*]], label %[[PRED_STORE_CONTINUE20:.*]]
10097
; CHECK: [[PRED_STORE_IF19]]:
101-
; CHECK-NEXT: [[TMP114:%.*]] = extractelement <4 x i64> [[TMP26]], i32 2
98+
; CHECK-NEXT: [[TMP114:%.*]] = extractelement <16 x i64> [[TMP27]], i32 2
10299
; CHECK-NEXT: [[TMP115:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP114]], i64 [[OFF]]
103100
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP115]], align 8, !alias.scope [[META3]]
104101
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE20]]
105102
; CHECK: [[PRED_STORE_CONTINUE20]]:
106-
; CHECK-NEXT: [[TMP42:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK]], i32 3
107-
; CHECK-NEXT: br i1 [[TMP42]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22]]
103+
; CHECK-NEXT: [[TMP37:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 3
104+
; CHECK-NEXT: br i1 [[TMP37]], label %[[PRED_STORE_IF21:.*]], label %[[PRED_STORE_CONTINUE22:.*]]
108105
; CHECK: [[PRED_STORE_IF21]]:
109-
; CHECK-NEXT: [[TMP120:%.*]] = extractelement <4 x i64> [[TMP26]], i32 3
106+
; CHECK-NEXT: [[TMP120:%.*]] = extractelement <16 x i64> [[TMP27]], i32 3
110107
; CHECK-NEXT: [[TMP121:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP120]], i64 [[OFF]]
111108
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP121]], align 8, !alias.scope [[META3]]
112109
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE22]]
113110
; CHECK: [[PRED_STORE_CONTINUE22]]:
111+
; CHECK-NEXT: [[TMP40:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 4
112+
; CHECK-NEXT: br i1 [[TMP40]], label %[[PRED_STORE_IF23:.*]], label %[[PRED_STORE_CONTINUE24:.*]]
113+
; CHECK: [[PRED_STORE_IF23]]:
114+
; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i64> [[TMP27]], i32 4
115+
; CHECK-NEXT: [[TMP42:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP41]], i64 [[OFF]]
116+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP42]], align 8, !alias.scope [[META3]]
117+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE24]]
118+
; CHECK: [[PRED_STORE_CONTINUE24]]:
119+
; CHECK-NEXT: [[TMP43:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 5
120+
; CHECK-NEXT: br i1 [[TMP43]], label %[[PRED_STORE_IF25:.*]], label %[[PRED_STORE_CONTINUE26:.*]]
121+
; CHECK: [[PRED_STORE_IF25]]:
122+
; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i64> [[TMP27]], i32 5
123+
; CHECK-NEXT: [[TMP45:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP44]], i64 [[OFF]]
124+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP45]], align 8, !alias.scope [[META3]]
125+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE26]]
126+
; CHECK: [[PRED_STORE_CONTINUE26]]:
127+
; CHECK-NEXT: [[TMP46:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 6
128+
; CHECK-NEXT: br i1 [[TMP46]], label %[[PRED_STORE_IF27:.*]], label %[[PRED_STORE_CONTINUE28:.*]]
129+
; CHECK: [[PRED_STORE_IF27]]:
130+
; CHECK-NEXT: [[TMP76:%.*]] = extractelement <16 x i64> [[TMP27]], i32 6
131+
; CHECK-NEXT: [[TMP77:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP76]], i64 [[OFF]]
132+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP77]], align 8, !alias.scope [[META3]]
133+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE28]]
134+
; CHECK: [[PRED_STORE_CONTINUE28]]:
135+
; CHECK-NEXT: [[TMP49:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 7
136+
; CHECK-NEXT: br i1 [[TMP49]], label %[[PRED_STORE_IF29:.*]], label %[[PRED_STORE_CONTINUE30:.*]]
137+
; CHECK: [[PRED_STORE_IF29]]:
138+
; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i64> [[TMP27]], i32 7
139+
; CHECK-NEXT: [[TMP51:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP50]], i64 [[OFF]]
140+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP51]], align 8, !alias.scope [[META3]]
141+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE30]]
142+
; CHECK: [[PRED_STORE_CONTINUE30]]:
143+
; CHECK-NEXT: [[TMP52:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 8
144+
; CHECK-NEXT: br i1 [[TMP52]], label %[[PRED_STORE_IF31:.*]], label %[[PRED_STORE_CONTINUE32:.*]]
145+
; CHECK: [[PRED_STORE_IF31]]:
146+
; CHECK-NEXT: [[TMP53:%.*]] = extractelement <16 x i64> [[TMP27]], i32 8
147+
; CHECK-NEXT: [[TMP54:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP53]], i64 [[OFF]]
148+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP54]], align 8, !alias.scope [[META3]]
149+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE32]]
150+
; CHECK: [[PRED_STORE_CONTINUE32]]:
151+
; CHECK-NEXT: [[TMP55:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 9
152+
; CHECK-NEXT: br i1 [[TMP55]], label %[[PRED_STORE_IF33:.*]], label %[[PRED_STORE_CONTINUE34:.*]]
153+
; CHECK: [[PRED_STORE_IF33]]:
154+
; CHECK-NEXT: [[TMP56:%.*]] = extractelement <16 x i64> [[TMP27]], i32 9
155+
; CHECK-NEXT: [[TMP57:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP56]], i64 [[OFF]]
156+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP57]], align 8, !alias.scope [[META3]]
157+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE34]]
158+
; CHECK: [[PRED_STORE_CONTINUE34]]:
159+
; CHECK-NEXT: [[TMP58:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 10
160+
; CHECK-NEXT: br i1 [[TMP58]], label %[[PRED_STORE_IF35:.*]], label %[[PRED_STORE_CONTINUE36:.*]]
161+
; CHECK: [[PRED_STORE_IF35]]:
162+
; CHECK-NEXT: [[TMP59:%.*]] = extractelement <16 x i64> [[TMP27]], i32 10
163+
; CHECK-NEXT: [[TMP60:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP59]], i64 [[OFF]]
164+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP60]], align 8, !alias.scope [[META3]]
165+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE36]]
166+
; CHECK: [[PRED_STORE_CONTINUE36]]:
167+
; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 11
168+
; CHECK-NEXT: br i1 [[TMP61]], label %[[PRED_STORE_IF37:.*]], label %[[PRED_STORE_CONTINUE38:.*]]
169+
; CHECK: [[PRED_STORE_IF37]]:
170+
; CHECK-NEXT: [[TMP62:%.*]] = extractelement <16 x i64> [[TMP27]], i32 11
171+
; CHECK-NEXT: [[TMP63:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP62]], i64 [[OFF]]
172+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP63]], align 8, !alias.scope [[META3]]
173+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE38]]
174+
; CHECK: [[PRED_STORE_CONTINUE38]]:
175+
; CHECK-NEXT: [[TMP64:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 12
176+
; CHECK-NEXT: br i1 [[TMP64]], label %[[PRED_STORE_IF39:.*]], label %[[PRED_STORE_CONTINUE40:.*]]
177+
; CHECK: [[PRED_STORE_IF39]]:
178+
; CHECK-NEXT: [[TMP65:%.*]] = extractelement <16 x i64> [[TMP27]], i32 12
179+
; CHECK-NEXT: [[TMP66:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP65]], i64 [[OFF]]
180+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP66]], align 8, !alias.scope [[META3]]
181+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE40]]
182+
; CHECK: [[PRED_STORE_CONTINUE40]]:
183+
; CHECK-NEXT: [[TMP67:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 13
184+
; CHECK-NEXT: br i1 [[TMP67]], label %[[PRED_STORE_IF41:.*]], label %[[PRED_STORE_CONTINUE42:.*]]
185+
; CHECK: [[PRED_STORE_IF41]]:
186+
; CHECK-NEXT: [[TMP68:%.*]] = extractelement <16 x i64> [[TMP27]], i32 13
187+
; CHECK-NEXT: [[TMP69:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP68]], i64 [[OFF]]
188+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP69]], align 8, !alias.scope [[META3]]
189+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE42]]
190+
; CHECK: [[PRED_STORE_CONTINUE42]]:
191+
; CHECK-NEXT: [[TMP70:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 14
192+
; CHECK-NEXT: br i1 [[TMP70]], label %[[PRED_STORE_IF43:.*]], label %[[PRED_STORE_CONTINUE44:.*]]
193+
; CHECK: [[PRED_STORE_IF43]]:
194+
; CHECK-NEXT: [[TMP71:%.*]] = extractelement <16 x i64> [[TMP27]], i32 14
195+
; CHECK-NEXT: [[TMP72:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP71]], i64 [[OFF]]
196+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP72]], align 8, !alias.scope [[META3]]
197+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE44]]
198+
; CHECK: [[PRED_STORE_CONTINUE44]]:
199+
; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK]], i32 15
200+
; CHECK-NEXT: br i1 [[TMP73]], label %[[PRED_STORE_IF45:.*]], label %[[PRED_STORE_CONTINUE46]]
201+
; CHECK: [[PRED_STORE_IF45]]:
202+
; CHECK-NEXT: [[TMP74:%.*]] = extractelement <16 x i64> [[TMP27]], i32 15
203+
; CHECK-NEXT: [[TMP75:%.*]] = getelementptr [16 x i64], ptr [[DST_1]], i64 [[TMP74]], i64 [[OFF]]
204+
; CHECK-NEXT: store i64 [[TMP30]], ptr [[TMP75]], align 8, !alias.scope [[META3]]
205+
; CHECK-NEXT: br label %[[PRED_STORE_CONTINUE46]]
206+
; CHECK: [[PRED_STORE_CONTINUE46]]:
114207
; CHECK-NEXT: store i8 0, ptr [[DST_2]], align 1, !alias.scope [[META5:![0-9]+]], !noalias [[META7:![0-9]+]]
115-
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
116-
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 [[INDEX_NEXT]], i32 [[TMP2]])
117-
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
208+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 16
209+
; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 [[INDEX_NEXT]], i32 [[TMP2]])
210+
; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
118211
; CHECK-NEXT: [[TMP48:%.*]] = xor i1 [[TMP47]], true
119-
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 16)
212+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <16 x i8> [[VEC_IND]], splat (i8 64)
120213
; CHECK-NEXT: br i1 [[TMP48]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
121214
; CHECK: [[MIDDLE_BLOCK]]:
122215
; CHECK-NEXT: br label %[[EXIT:.*]]

0 commit comments

Comments
 (0)