@@ -8,17 +8,51 @@ define i64 @pr97452_scalable_vf1_for_live_out(ptr %src) {
88; CHECK-LABEL: define i64 @pr97452_scalable_vf1_for_live_out(
99; CHECK-SAME: ptr [[SRC:%.*]]) {
1010; CHECK-NEXT: [[ENTRY:.*]]:
11+ ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
12+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]]
13+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
14+ ; CHECK: [[VECTOR_PH]]:
15+ ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
16+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]]
17+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]]
18+ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
19+ ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
20+ ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1
21+ ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 0, i32 [[TMP4]]
22+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
23+ ; CHECK: [[VECTOR_BODY]]:
24+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
25+ ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 1 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ]
26+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
27+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
28+ ; CHECK-NEXT: [[WIDE_LOAD]] = load <vscale x 1 x i64>, ptr [[TMP6]], align 8
29+ ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> [[VECTOR_RECUR]], <vscale x 1 x i64> [[WIDE_LOAD]], i32 -1)
30+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]
31+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
32+ ; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
33+ ; CHECK: [[MIDDLE_BLOCK]]:
34+ ; CHECK-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32()
35+ ; CHECK-NEXT: [[TMP10:%.*]] = sub i32 [[TMP9]], 1
36+ ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <vscale x 1 x i64> [[TMP7]], i32 [[TMP10]]
37+ ; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.vscale.i32()
38+ ; CHECK-NEXT: [[TMP13:%.*]] = sub i32 [[TMP12]], 1
39+ ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 1 x i64> [[WIDE_LOAD]], i32 [[TMP13]]
40+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]]
41+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
42+ ; CHECK: [[SCALAR_PH]]:
43+ ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
44+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1145; CHECK-NEXT: br label %[[LOOP:.*]]
1246; CHECK: [[LOOP]]:
13- ; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0 , %[[ENTRY ]] ], [ [[L:%.*]], %[[LOOP]] ]
14- ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0 , %[[ENTRY ]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
47+ ; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]] , %[[SCALAR_PH ]] ], [ [[L:%.*]], %[[LOOP]] ]
48+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]] , %[[SCALAR_PH ]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1549; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
1650; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
1751; CHECK-NEXT: [[L]] = load i64, ptr [[GEP]], align 8
1852; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22
19- ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.* ]], label %[[LOOP]]
53+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+ ]]
2054; CHECK: [[EXIT]]:
21- ; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ]
55+ ; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[FOR]], %[[LOOP]] ], [ [[TMP11]], %[[MIDDLE_BLOCK]] ]
2256; CHECK-NEXT: ret i64 [[RES]]
2357;
2458entry:
@@ -43,17 +77,51 @@ define void @pr97452_scalable_vf1_for_no_live_out(ptr %src, ptr noalias %dst) {
4377; CHECK-LABEL: define void @pr97452_scalable_vf1_for_no_live_out(
4478; CHECK-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) {
4579; CHECK-NEXT: [[ENTRY:.*]]:
80+ ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
81+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 23, [[TMP0]]
82+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
83+ ; CHECK: [[VECTOR_PH]]:
84+ ; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64()
85+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 23, [[TMP1]]
86+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 23, [[N_MOD_VF]]
87+ ; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
88+ ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32()
89+ ; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1
90+ ; CHECK-NEXT: [[VECTOR_RECUR_INIT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 0, i32 [[TMP4]]
91+ ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
92+ ; CHECK: [[VECTOR_BODY]]:
93+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
94+ ; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <vscale x 1 x i64> [ [[VECTOR_RECUR_INIT]], %[[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], %[[VECTOR_BODY]] ]
95+ ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[INDEX]]
96+ ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0
97+ ; CHECK-NEXT: [[WIDE_LOAD]] = load <vscale x 1 x i64>, ptr [[TMP6]], align 8
98+ ; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 1 x i64> @llvm.vector.splice.nxv1i64(<vscale x 1 x i64> [[VECTOR_RECUR]], <vscale x 1 x i64> [[WIDE_LOAD]], i32 -1)
99+ ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[INDEX]]
100+ ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[TMP8]], i32 0
101+ ; CHECK-NEXT: store <vscale x 1 x i64> [[TMP7]], ptr [[TMP9]], align 8
102+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP2]]
103+ ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
104+ ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
105+ ; CHECK: [[MIDDLE_BLOCK]]:
106+ ; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vscale.i32()
107+ ; CHECK-NEXT: [[TMP12:%.*]] = sub i32 [[TMP11]], 1
108+ ; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <vscale x 1 x i64> [[WIDE_LOAD]], i32 [[TMP12]]
109+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 23, [[N_VEC]]
110+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
111+ ; CHECK: [[SCALAR_PH]]:
112+ ; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
113+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
46114; CHECK-NEXT: br label %[[LOOP:.*]]
47115; CHECK: [[LOOP]]:
48- ; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ 0 , %[[ENTRY ]] ], [ [[L:%.*]], %[[LOOP]] ]
49- ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0 , %[[ENTRY ]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
116+ ; CHECK-NEXT: [[FOR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]] , %[[SCALAR_PH ]] ], [ [[L:%.*]], %[[LOOP]] ]
117+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]] , %[[SCALAR_PH ]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
50118; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
51119; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[SRC]], i64 [[IV]]
52120; CHECK-NEXT: [[L]] = load i64, ptr [[GEP]], align 8
53121; CHECK-NEXT: [[GEP_DST:%.*]] = getelementptr inbounds i64, ptr [[DST]], i64 [[IV]]
54122; CHECK-NEXT: store i64 [[FOR]], ptr [[GEP_DST]], align 8
55123; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], 22
56- ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT:.* ]], label %[[LOOP]]
124+ ; CHECK-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP5:![0-9]+ ]]
57125; CHECK: [[EXIT]]:
58126; CHECK-NEXT: ret void
59127;
@@ -74,3 +142,11 @@ loop:
74142exit:
75143 ret void
76144}
145+ ;.
146+ ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
147+ ; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
148+ ; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
149+ ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
150+ ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
151+ ; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
152+ ;.
0 commit comments