@@ -154,6 +154,24 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
154154; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
155155; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
156156; CHECK-NEXT: br label [[VECTOR_PH:%.*]]
157+ ; CHECK: vector.scevcheck:
158+ ; CHECK-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 4)
159+ ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[UMAX1]], -1
160+ ; CHECK-NEXT: [[TMP16:%.*]] = lshr i64 [[TMP14]], 2
161+ ; CHECK-NEXT: [[MUL:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP16]])
162+ ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i64, i1 } [[MUL]], 0
163+ ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i64, i1 } [[MUL]], 1
164+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[MUL_RESULT]]
165+ ; CHECK-NEXT: [[TMP6:%.*]] = icmp ult ptr [[TMP5]], [[SRC]]
166+ ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[MUL_OVERFLOW]]
167+ ; CHECK-NEXT: [[MUL1:%.*]] = call { i64, i1 } @llvm.umul.with.overflow.i64(i64 16, i64 [[TMP16]])
168+ ; CHECK-NEXT: [[MUL_RESULT2:%.*]] = extractvalue { i64, i1 } [[MUL1]], 0
169+ ; CHECK-NEXT: [[MUL_OVERFLOW3:%.*]] = extractvalue { i64, i1 } [[MUL1]], 1
170+ ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[DST:%.*]], i64 [[MUL_RESULT2]]
171+ ; CHECK-NEXT: [[TMP23:%.*]] = icmp ult ptr [[TMP17]], [[DST]]
172+ ; CHECK-NEXT: [[TMP24:%.*]] = or i1 [[TMP23]], [[MUL_OVERFLOW3]]
173+ ; CHECK-NEXT: [[TMP25:%.*]] = or i1 [[TMP7]], [[TMP24]]
174+ ; CHECK-NEXT: br i1 [[TMP25]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH1:%.*]]
157175; CHECK: vector.ph:
158176; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vscale.i64()
159177; CHECK-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 4
@@ -171,12 +189,12 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
171189; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[DOTSPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
172190; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
173191; CHECK: vector.body:
174- ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH ]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
175- ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH ]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
176- ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH ]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
177- ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[SRC:%.* ]], <vscale x 4 x i64> [[VEC_IND]]
192+ ; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH1 ]] ], [ [[INDEX_NEXT2:%.*]], [[VECTOR_BODY]] ]
193+ ; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[VECTOR_PH1 ]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[VECTOR_BODY]] ]
194+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i64> [ [[INDUCTION]], [[VECTOR_PH1 ]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
195+ ; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[SRC]], <vscale x 4 x i64> [[VEC_IND]]
178196; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32.nxv4p0(<vscale x 4 x ptr> align 4 [[TMP19]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]], <vscale x 4 x i32> poison)
179- ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST:%.* ]], <vscale x 4 x i64> [[VEC_IND]]
197+ ; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]]
180198; CHECK-NEXT: call void @llvm.masked.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[WIDE_MASKED_GATHER]], <vscale x 4 x ptr> align 4 [[TMP20]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
181199; CHECK-NEXT: [[INDEX_NEXT2]] = add i64 [[INDEX1]], [[TMP4]]
182200; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP12]])
@@ -186,8 +204,7 @@ define void @copy_stride4(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
186204; CHECK-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
187205; CHECK: middle.block:
188206; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]]
189- ; CHECK: while.end.loopexit:
190- ; CHECK-NEXT: ret void
207+ ; CHECK: scalar.ph:
191208;
192209entry:
193210 br label %while.body
@@ -235,7 +252,7 @@ define void @simple_gather_scatter(ptr noalias %dst, ptr noalias %src, ptr noali
235252; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]])
236253; CHECK-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
237254; CHECK-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true
238- ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6 :![0-9]+]]
255+ ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7 :![0-9]+]]
239256; CHECK: middle.block:
240257; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]]
241258; CHECK: while.end.loopexit:
@@ -289,7 +306,7 @@ define void @uniform_load(ptr noalias %dst, ptr noalias readonly %src, i64 %n) #
289306; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]])
290307; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
291308; CHECK-NEXT: [[TMP13:%.*]] = xor i1 [[TMP14]], true
292- ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7 :![0-9]+]]
309+ ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
293310; CHECK: middle.block:
294311; CHECK-NEXT: br label [[FOR_END:%.*]]
295312; CHECK: for.end:
@@ -348,7 +365,7 @@ define void @cond_uniform_load(ptr noalias %dst, ptr noalias readonly %src, ptr
348365; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]])
349366; CHECK-NEXT: [[TMP17:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
350367; CHECK-NEXT: [[TMP18:%.*]] = xor i1 [[TMP17]], true
351- ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8 :![0-9]+]]
368+ ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9 :![0-9]+]]
352369; CHECK: middle.block:
353370; CHECK-NEXT: br label [[FOR_END:%.*]]
354371; CHECK: for.end:
@@ -410,7 +427,7 @@ define void @uniform_store(ptr noalias %dst, ptr noalias readonly %src, i64 %n)
410427; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP9]])
411428; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
412429; CHECK-NEXT: [[TMP12:%.*]] = xor i1 [[TMP13]], true
413- ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9 :![0-9]+]]
430+ ; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10 :![0-9]+]]
414431; CHECK: middle.block:
415432; CHECK-NEXT: br label [[FOR_END:%.*]]
416433; CHECK: for.end:
@@ -462,7 +479,7 @@ define void @simple_fdiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
462479; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]])
463480; CHECK-NEXT: [[TMP13:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
464481; CHECK-NEXT: [[TMP14:%.*]] = xor i1 [[TMP13]], true
465- ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10 :![0-9]+]]
482+ ; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11 :![0-9]+]]
466483; CHECK: middle.block:
467484; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]]
468485; CHECK: while.end.loopexit:
@@ -518,7 +535,7 @@ define void @simple_idiv(ptr noalias %dst, ptr noalias %src, i64 %n) #0 {
518535; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX1]], i64 [[TMP9]])
519536; CHECK-NEXT: [[TMP14:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
520537; CHECK-NEXT: [[TMP17:%.*]] = xor i1 [[TMP14]], true
521- ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11 :![0-9]+]]
538+ ; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12 :![0-9]+]]
522539; CHECK: middle.block:
523540; CHECK-NEXT: br label [[WHILE_END_LOOPEXIT:%.*]]
524541; CHECK: while.end.loopexit:
@@ -561,7 +578,7 @@ define void @simple_memset_trip1024(i32 %val, ptr %ptr, i64 %n) #0 {
561578; CHECK-NEXT: store <vscale x 4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP7]], align 4
562579; CHECK-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], [[TMP3]]
563580; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT2]], [[N_VEC]]
564- ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12 :![0-9]+]]
581+ ; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13 :![0-9]+]]
565582; CHECK: middle.block:
566583; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]]
567584; CHECK-NEXT: br i1 [[CMP_N]], label [[WHILE_END_LOOPEXIT:%.*]], label [[SCALAR_PH:%.*]]
0 commit comments