@@ -9,14 +9,14 @@ define void @vscale_mul_4(ptr noalias noundef readonly captures(none) %a, ptr no
99; CHECK-NEXT: [[ENTRY:.*]]:
1010; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
1111; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
12- ; CHECK-NEXT: [[TMP4 :%.*]] = call i64 @llvm.vscale.i64()
13- ; CHECK-NEXT: [[TMP5 :%.*]] = mul nuw i64 [[TMP4 ]], 4
14- ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP5 ]]
12+ ; CHECK-NEXT: [[TMP10 :%.*]] = call i64 @llvm.vscale.i64()
13+ ; CHECK-NEXT: [[TMP3 :%.*]] = mul nuw i64 [[TMP10 ]], 4
14+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], [[TMP3 ]]
1515; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
1616; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, ptr [[A]], align 4
1717; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 4 x float>, ptr [[B]], align 4
18- ; CHECK-NEXT: [[TMP10 :%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
19- ; CHECK-NEXT: store <vscale x 4 x float> [[TMP10 ]], ptr [[B]], align 4
18+ ; CHECK-NEXT: [[TMP4 :%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]]
19+ ; CHECK-NEXT: store <vscale x 4 x float> [[TMP4 ]], ptr [[B]], align 4
2020; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP1]], [[N_VEC]]
2121; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
2222; CHECK: [[FOR_COND_CLEANUP]]:
@@ -121,36 +121,29 @@ define void @vscale_mul_12(ptr noalias noundef readonly captures(none) %a, ptr n
121121; CHECK-NEXT: [[ENTRY:.*]]:
122122; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
123123; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 12
124- ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 2
125- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
126- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
127- ; CHECK: [[VECTOR_PH]]:
128124; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
129125; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
130126; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
131127; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
132128; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
133129; CHECK: [[VECTOR_BODY]]:
134- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
130+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
135131; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
136- ; CHECK-NEXT: [[WIDE_LOAD :%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
137- ; CHECK-NEXT: [[TMP9 :%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
138- ; CHECK-NEXT: [[WIDE_LOAD1 :%.*]] = load <vscale x 4 x float>, ptr [[TMP9 ]], align 4
139- ; CHECK-NEXT: [[TMP11 :%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD ]], [[WIDE_LOAD1 ]]
140- ; CHECK-NEXT: store <vscale x 4 x float> [[TMP11 ]], ptr [[TMP9 ]], align 4
132+ ; CHECK-NEXT: [[WIDE_LOAD2 :%.*]] = load <vscale x 4 x float>, ptr [[TMP7]], align 4
133+ ; CHECK-NEXT: [[TMP12 :%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDEX]]
134+ ; CHECK-NEXT: [[WIDE_LOAD4 :%.*]] = load <vscale x 4 x float>, ptr [[TMP12 ]], align 4
135+ ; CHECK-NEXT: [[TMP25 :%.*]] = fmul <vscale x 4 x float> [[WIDE_LOAD2 ]], [[WIDE_LOAD4 ]]
136+ ; CHECK-NEXT: store <vscale x 4 x float> [[TMP25 ]], ptr [[TMP12 ]], align 4
141137; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]]
142- ; CHECK-NEXT: [[TMP12 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
143- ; CHECK-NEXT: br i1 [[TMP12 ]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
138+ ; CHECK-NEXT: [[TMP22 :%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
139+ ; CHECK-NEXT: br i1 [[TMP22 ]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
144140; CHECK: [[MIDDLE_BLOCK]]:
145141; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
146- ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
147- ; CHECK: [[SCALAR_PH]]:
148- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
149- ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
142+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
150143; CHECK: [[FOR_COND_CLEANUP]]:
151144; CHECK-NEXT: ret void
152145; CHECK: [[FOR_BODY]]:
153- ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ]
146+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ], [ [[N_VEC ]], %[[MIDDLE_BLOCK ]] ]
154147; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
155148; CHECK-NEXT: [[TMP13:%.*]] = load float, ptr [[ARRAYIDX]], align 4
156149; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
@@ -188,17 +181,13 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
188181; CHECK-NEXT: [[ENTRY:.*]]:
189182; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
190183; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 31
191- ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 3
192- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
193- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
194- ; CHECK: [[VECTOR_PH]]:
195184; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
196185; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
197186; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
198187; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
199188; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
200189; CHECK: [[VECTOR_BODY]]:
201- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
190+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
202191; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
203192; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
204193; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
@@ -220,14 +209,11 @@ define void @vscale_mul_31(ptr noalias noundef readonly captures(none) %a, ptr n
220209; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
221210; CHECK: [[MIDDLE_BLOCK]]:
222211; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
223- ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
224- ; CHECK: [[SCALAR_PH]]:
225- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
226- ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
212+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
227213; CHECK: [[FOR_COND_CLEANUP]]:
228214; CHECK-NEXT: ret void
229215; CHECK: [[FOR_BODY]]:
230- ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ]
216+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ], [ [[N_VEC ]], %[[MIDDLE_BLOCK ]] ]
231217; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
232218; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4
233219; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
@@ -265,17 +251,13 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
265251; CHECK-NEXT: [[ENTRY:.*]]:
266252; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64()
267253; CHECK-NEXT: [[MUL1:%.*]] = mul nuw nsw i64 [[TMP0]], 64
268- ; CHECK-NEXT: [[TMP2:%.*]] = shl nuw nsw i64 [[TMP0]], 3
269- ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[MUL1]], [[TMP2]]
270- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
271- ; CHECK: [[VECTOR_PH]]:
272254; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
273255; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8
274256; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[MUL1]], [[TMP4]]
275257; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[MUL1]], [[N_MOD_VF]]
276258; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
277259; CHECK: [[VECTOR_BODY]]:
278- ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
260+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[ENTRY ]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
279261; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX]]
280262; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
281263; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i64 [[TMP9]], 2
@@ -297,14 +279,11 @@ define void @vscale_mul_64(ptr noalias noundef readonly captures(none) %a, ptr n
297279; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
298280; CHECK: [[MIDDLE_BLOCK]]:
299281; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[MUL1]], [[N_VEC]]
300- ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[SCALAR_PH]]
301- ; CHECK: [[SCALAR_PH]]:
302- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
303- ; CHECK-NEXT: br label %[[FOR_BODY:.*]]
282+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP:.*]], label %[[FOR_BODY:.*]]
304283; CHECK: [[FOR_COND_CLEANUP]]:
305284; CHECK-NEXT: ret void
306285; CHECK: [[FOR_BODY]]:
307- ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ]
286+ ; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.* ]], %[[FOR_BODY ]] ], [ [[N_VEC ]], %[[MIDDLE_BLOCK ]] ]
308287; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDVARS_IV]]
309288; CHECK-NEXT: [[TMP23:%.*]] = load float, ptr [[ARRAYIDX]], align 4
310289; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds nuw float, ptr [[B]], i64 [[INDVARS_IV]]
0 commit comments