@@ -9,14 +9,14 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no
99; CHECK-NEXT: [[ENTRY:.*]]:
1010; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
1111; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
12- ; CHECK-NEXT: [[TMP4 :%.*]] = call i64 @llvm.vscale.i64()
13- ; CHECK-NEXT: [[TMP5 :%.*]] = mul nuw i64 [[TMP4 ]], 4
14- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5 ]]
12+ ; CHECK-NEXT: [[TMP10 :%.*]] = call i64 @llvm.vscale.i64()
13+ ; CHECK-NEXT: [[TMP3 :%.*]] = mul nuw i64 [[TMP10 ]], 4
14+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP3 ]]
1515; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
1616; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[A]], align 4
1717; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[B]], align 4
18- ; CHECK-NEXT: [[TMP10 :%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
19- ; CHECK-NEXT: store <vscale x 4 x float> [[TMP10 ]], ptr [[B]], align 4
18+ ; CHECK-NEXT: [[TMP4 :%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
19+ ; CHECK-NEXT: store <vscale x 4 x float> [[TMP4 ]], ptr [[B]], align 4
2020; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
2121; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
2222; CHECK: [[FOR_COND_CLEANUP]]:
@@ -124,36 +124,29 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
124124; CHECK-NEXT: [[ENTRY:.*]]:
125125; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
126126; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 12
127- ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 2
128- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
129- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
130- ; CHECK: [[VECTOR_PH]]:
131127; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
132128; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
133129; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
134130; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
135131; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
136132; CHECK: [[VECTOR_BODY]]:
137- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
133+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
138134; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
139- ; CHECK-NEXT: [[WIDE_LOAD :%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
140- ; CHECK-NEXT: [[TMP9 :%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
141- ; CHECK-NEXT: [[WIDE_LOAD1 :%.*]] = load <vscale x 4 x float>, ptr [[TMP9 ]], align 4
142- ; CHECK-NEXT: [[TMP11 :%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD ]], [[WIDE_LOAD1 ]]
143- ; CHECK-NEXT: store <vscale x 4 x float> [[TMP11 ]], ptr [[TMP9 ]], align 4
135+ ; CHECK-NEXT: [[WIDE_LOAD2 :%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
136+ ; CHECK-NEXT: [[TMP12 :%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
137+ ; CHECK-NEXT: [[WIDE_LOAD4 :%.*]] = load <vscale x 4 x float>, ptr [[TMP12 ]], align 4
138+ ; CHECK-NEXT: [[TMP25 :%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2 ]], [[WIDE_LOAD4 ]]
139+ ; CHECK-NEXT: store <vscale x 4 x float> [[TMP25 ]], ptr [[TMP12 ]], align 4
144140; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
145- ; CHECK-NEXT: [[TMP12 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
146- ; CHECK-NEXT: br i1 [[TMP12 ]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
141+ ; CHECK-NEXT: [[TMP22 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
142+ ; CHECK-NEXT: br i1 [[TMP22 ]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
147143; CHECK: [[MIDDLE_BLOCK]]:
148144; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
149- ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
150- ; CHECK: [[SCALAR_PH]]:
151- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
152- ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
145+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
153146; CHECK: [[FOR_COND_CLEANUP]]:
154147; CHECK-NEXT: ret void
155148; CHECK: [[FOR_BODY]]:
156- ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ]
149+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ], [ [[N_VEC ]], %[[MIDDLE_BLOCK ]] ]
157150; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
158151; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4
159152; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
@@ -191,17 +184,13 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
191184; CHECK-NEXT: [[ENTRY:.*]]:
192185; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
193186; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31
194- ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 3
195- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
196- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
197- ; CHECK: [[VECTOR_PH]]:
198187; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
199188; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
200189; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
201190; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
202191; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
203192; CHECK: [[VECTOR_BODY]]:
204- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
193+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
205194; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
206195; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
207196; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
@@ -226,14 +215,11 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
226215; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
227216; CHECK: [[MIDDLE_BLOCK]]:
228217; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
229- ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
230- ; CHECK: [[SCALAR_PH]]:
231- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
232- ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
218+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
233219; CHECK: [[FOR_COND_CLEANUP]]:
234220; CHECK-NEXT: ret void
235221; CHECK: [[FOR_BODY]]:
236- ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ]
222+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ], [ [[N_VEC ]], %[[MIDDLE_BLOCK ]] ]
237223; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
238224; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4
239225; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
@@ -271,17 +257,13 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
271257; CHECK-NEXT: [[ENTRY:.*]]:
272258; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
273259; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64
274- ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 3
275- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
276- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
277- ; CHECK: [[VECTOR_PH]]:
278260; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
279261; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
280262; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
281263; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
282264; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
283265; CHECK: [[VECTOR_BODY]]:
284- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
266+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
285267; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
286268; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
287269; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
@@ -306,14 +288,11 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
306288; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
307289; CHECK: [[MIDDLE_BLOCK]]:
308290; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
309- ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
310- ; CHECK: [[SCALAR_PH]]:
311- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
312- ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
291+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
313292; CHECK: [[FOR_COND_CLEANUP]]:
314293; CHECK-NEXT: ret void
315294; CHECK: [[FOR_BODY]]:
316- ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ]
295+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ], [ [[N_VEC ]], %[[MIDDLE_BLOCK ]] ]
317296; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
318297; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4
319298; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
0 commit comments