@@ -737,12 +737,15 @@ exit:
737737 ret void
738738}
739739
740- define void @exit_cond_zext_iv (ptr %dst , i64 %N ) {
740+ define void @exit_cond_zext_iv (ptr %dst , i64 %N ) # 0 {
741741; DEFAULT-LABEL: define void @exit_cond_zext_iv(
742- ; DEFAULT-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
742+ ; DEFAULT-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
743743; DEFAULT-NEXT: [[ENTRY:.*]]:
744744; DEFAULT-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
745- ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], 2
745+ ; DEFAULT-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
746+ ; DEFAULT-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP7]], 8
747+ ; DEFAULT-NEXT: [[TMP8:%.*]] = call i64 @llvm.umax.i64(i64 20, i64 [[TMP1]])
748+ ; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[UMAX1]], [[TMP8]]
746749; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
747750; DEFAULT: [[VECTOR_SCEVCHECK]]:
748751; DEFAULT-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
@@ -754,18 +757,23 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
754757; DEFAULT-NEXT: [[TMP6:%.*]] = or i1 [[TMP4]], [[TMP5]]
755758; DEFAULT-NEXT: br i1 [[TMP6]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
756759; DEFAULT: [[VECTOR_PH]]:
757- ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], 2
760+ ; DEFAULT-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
761+ ; DEFAULT-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 8
762+ ; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[UMAX1]], [[TMP10]]
758763; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[UMAX1]], [[N_MOD_VF]]
764+ ; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
765+ ; DEFAULT-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP13]], 8
759766; DEFAULT-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32
760767; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
761768; DEFAULT: [[VECTOR_BODY]]:
762769; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
763- ; DEFAULT-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], 1
764- ; DEFAULT-NEXT: [[TMP9:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[INDEX]], i32 2
765- ; DEFAULT-NEXT: [[TMP10:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP8]], i32 2
766- ; DEFAULT-NEXT: store i32 0, ptr [[TMP9]], align 8
767- ; DEFAULT-NEXT: store i32 0, ptr [[TMP10]], align 8
768- ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
770+ ; DEFAULT-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]]
771+ ; DEFAULT-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64()
772+ ; DEFAULT-NEXT: [[TMP16:%.*]] = mul nuw i64 [[TMP15]], 4
773+ ; DEFAULT-NEXT: [[TMP17:%.*]] = getelementptr i32, ptr [[TMP14]], i64 [[TMP16]]
774+ ; DEFAULT-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[TMP14]], align 4
775+ ; DEFAULT-NEXT: store <vscale x 4 x i32> zeroinitializer, ptr [[TMP17]], align 4
776+ ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP12]]
769777; DEFAULT-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
770778; DEFAULT-NEXT: br i1 [[TMP11]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
771779; DEFAULT: [[MIDDLE_BLOCK]]:
@@ -778,8 +786,8 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
778786; DEFAULT: [[LOOP]]:
779787; DEFAULT-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
780788; DEFAULT-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
781- ; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2
782- ; DEFAULT-NEXT: store i32 0, ptr [[GEP]], align 8
789+ ; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV_CONV]]
790+ ; DEFAULT-NEXT: store i32 0, ptr [[GEP]], align 4
783791; DEFAULT-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
784792; DEFAULT-NEXT: [[IV_EXT]] = zext i32 [[IV_1_NEXT]] to i64
785793; DEFAULT-NEXT: [[C:%.*]] = icmp ult i64 [[IV_EXT]], [[N]]
@@ -788,63 +796,56 @@ define void @exit_cond_zext_iv(ptr %dst, i64 %N) {
788796; DEFAULT-NEXT: ret void
789797;
790798; PRED-LABEL: define void @exit_cond_zext_iv(
791- ; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) {
799+ ; PRED-SAME: ptr [[DST:%.*]], i64 [[N:%.*]]) #[[ATTR0]] {
792800; PRED-NEXT: [[ENTRY:.*]]:
793801; PRED-NEXT: [[UMAX1:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
794802; PRED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
795803; PRED: [[VECTOR_SCEVCHECK]]:
796804; PRED-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[N]], i64 1)
797805; PRED-NEXT: [[TMP0:%.*]] = add i64 [[UMAX]], -1
798- ; PRED-NEXT: [[TMP2 :%.*]] = trunc i64 [[TMP0]] to i32
799- ; PRED-NEXT: [[TMP3 :%.*]] = add i32 1, [[TMP2 ]]
800- ; PRED-NEXT: [[TMP4 :%.*]] = icmp ult i32 [[TMP3 ]], 1
801- ; PRED-NEXT: [[TMP5 :%.*]] = icmp ugt i64 [[TMP0]], 4294967295
802- ; PRED-NEXT: [[TMP6 :%.*]] = or i1 [[TMP4 ]], [[TMP5 ]]
803- ; PRED-NEXT: br i1 [[TMP6 ]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
806+ ; PRED-NEXT: [[TMP1 :%.*]] = trunc i64 [[TMP0]] to i32
807+ ; PRED-NEXT: [[TMP2 :%.*]] = add i32 1, [[TMP1 ]]
808+ ; PRED-NEXT: [[TMP3 :%.*]] = icmp ult i32 [[TMP2 ]], 1
809+ ; PRED-NEXT: [[TMP4 :%.*]] = icmp ugt i64 [[TMP0]], 4294967295
810+ ; PRED-NEXT: [[TMP5 :%.*]] = or i1 [[TMP3 ]], [[TMP4 ]]
811+ ; PRED-NEXT: br i1 [[TMP5 ]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
804812; PRED: [[VECTOR_PH]]:
805- ; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], 1
806- ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 2
813+ ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
814+ ; PRED-NEXT: [[TMP7:%.*]] = mul nuw i64 [[TMP6]], 4
815+ ; PRED-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1
816+ ; PRED-NEXT: [[N_RND_UP:%.*]] = add i64 [[UMAX1]], [[TMP8]]
817+ ; PRED-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP7]]
807818; PRED-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]]
808- ; PRED-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[UMAX1]], 1
809- ; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0
810- ; PRED-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer
819+ ; PRED-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
820+ ; PRED-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
821+ ; PRED-NEXT: [[TMP11:%.*]] = call i64 @llvm.vscale.i64()
822+ ; PRED-NEXT: [[TMP12:%.*]] = mul nuw i64 [[TMP11]], 4
823+ ; PRED-NEXT: [[TMP13:%.*]] = sub i64 [[UMAX1]], [[TMP12]]
824+ ; PRED-NEXT: [[TMP14:%.*]] = icmp ugt i64 [[UMAX1]], [[TMP12]]
825+ ; PRED-NEXT: [[TMP15:%.*]] = select i1 [[TMP14]], i64 [[TMP13]], i64 0
826+ ; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 [[UMAX1]])
811827; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
812828; PRED: [[VECTOR_BODY]]:
813- ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE5:.*]] ]
814- ; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[INDEX]], i64 0
815- ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
816- ; PRED-NEXT: [[VEC_IV:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1>
817- ; PRED-NEXT: [[TMP7:%.*]] = icmp ule <2 x i64> [[VEC_IV]], [[BROADCAST_SPLAT3]]
818- ; PRED-NEXT: [[TMP8:%.*]] = extractelement <2 x i1> [[TMP7]], i32 0
819- ; PRED-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
820- ; PRED: [[PRED_STORE_IF]]:
821- ; PRED-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 0
822- ; PRED-NEXT: [[TMP10:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP9]], i32 2
823- ; PRED-NEXT: store i32 0, ptr [[TMP10]], align 8
824- ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
825- ; PRED: [[PRED_STORE_CONTINUE]]:
826- ; PRED-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP7]], i32 1
827- ; PRED-NEXT: br i1 [[TMP11]], label %[[PRED_STORE_IF4:.*]], label %[[PRED_STORE_CONTINUE5]]
828- ; PRED: [[PRED_STORE_IF4]]:
829- ; PRED-NEXT: [[TMP12:%.*]] = add i64 [[INDEX]], 1
830- ; PRED-NEXT: [[TMP13:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[TMP12]], i32 2
831- ; PRED-NEXT: store i32 0, ptr [[TMP13]], align 8
832- ; PRED-NEXT: br label %[[PRED_STORE_CONTINUE5]]
833- ; PRED: [[PRED_STORE_CONTINUE5]]:
834- ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
835- ; PRED-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
836- ; PRED-NEXT: br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
829+ ; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
830+ ; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
831+ ; PRED-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], i64 [[INDEX]]
832+ ; PRED-NEXT: call void @llvm.masked.store.nxv4i32.p0(<vscale x 4 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
833+ ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]]
834+ ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP15]])
835+ ; PRED-NEXT: [[TMP17:%.*]] = xor <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], splat (i1 true)
836+ ; PRED-NEXT: [[TMP18:%.*]] = extractelement <vscale x 4 x i1> [[TMP17]], i32 0
837+ ; PRED-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
837838; PRED: [[MIDDLE_BLOCK]]:
838839; PRED-NEXT: br label %[[EXIT:.*]]
839840; PRED: [[SCALAR_PH]]:
840841; PRED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
841- ; PRED-NEXT: [[BC_RESUME_VAL6 :%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
842+ ; PRED-NEXT: [[BC_RESUME_VAL2 :%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
842843; PRED-NEXT: br label %[[LOOP:.*]]
843844; PRED: [[LOOP]]:
844845; PRED-NEXT: [[IV_1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
845- ; PRED-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL6 ]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
846- ; PRED-NEXT: [[GEP:%.*]] = getelementptr { [100 x i32], i32, i32 }, ptr [[DST]], i64 [[IV_CONV]], i32 2
847- ; PRED-NEXT: store i32 0, ptr [[GEP]], align 8
846+ ; PRED-NEXT: [[IV_CONV:%.*]] = phi i64 [ [[BC_RESUME_VAL2 ]], %[[SCALAR_PH]] ], [ [[IV_EXT:%.*]], %[[LOOP]] ]
847+ ; PRED-NEXT: [[GEP:%.*]] = getelementptr i32, ptr [[DST]], i64 [[IV_CONV]]
848+ ; PRED-NEXT: store i32 0, ptr [[GEP]], align 4
848849; PRED-NEXT: [[IV_1_NEXT]] = add i32 [[IV_1]], 1
849850; PRED-NEXT: [[IV_EXT]] = zext i32 [[IV_1_NEXT]] to i64
850851; PRED-NEXT: [[C:%.*]] = icmp ult i64 [[IV_EXT]], [[N]]
@@ -858,8 +859,8 @@ entry:
858859loop:
859860 %iv.1 = phi i32 [ 0 , %entry ], [ %iv.1.next , %loop ]
860861 %iv.conv = phi i64 [ 0 , %entry ], [ %iv.ext , %loop ]
861- %gep = getelementptr {[ 100 x i32 ], i32 , i32 }, ptr %dst , i64 %iv.conv , i32 2
862- store i32 0 , ptr %gep , align 8
862+ %gep = getelementptr i32 , ptr %dst , i64 %iv.conv
863+ store i32 0 , ptr %gep , align 4
863864 %iv.1.next = add i32 %iv.1 , 1
864865 %iv.ext = zext i32 %iv.1.next to i64
865866 %c = icmp ult i64 %iv.ext , %N
0 commit comments