@@ -7,20 +7,22 @@ target triple = "powerpc64le-unknown-linux-gnu"
77define i1 @select_exit_cond (ptr %start , ptr %end , i64 %N ) {
88; CHECK-LABEL: define i1 @select_exit_cond(
99; CHECK-SAME: ptr [[START:%.*]], ptr [[END:%.*]], i64 [[N:%.*]]) {
10- ; CHECK-NEXT: [[ENTRY :.*]]:
10+ ; CHECK-NEXT: [[ITER_CHECK :.*]]:
1111; CHECK-NEXT: [[START2:%.*]] = ptrtoint ptr [[START]] to i64
1212; CHECK-NEXT: [[END1:%.*]] = ptrtoint ptr [[END]] to i64
1313; CHECK-NEXT: [[TMP0:%.*]] = freeze i64 [[N]]
1414; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[END1]], i64 [[START2]])
1515; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[UMAX]], [[START2]]
1616; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 [[TMP1]])
1717; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[UMIN]], 1
18+ ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP2]], 2
19+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
20+ ; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
1821; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 16
19- ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH :.*]], label %[[VECTOR_PH:.*]]
22+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_PH :.*]], label %[[VECTOR_PH:.*]]
2023; CHECK: [[VECTOR_PH]]:
2124; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
2225; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
23- ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
2426; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2527; CHECK: [[VECTOR_BODY]]:
2628; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -96,24 +98,61 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
9698; CHECK-NEXT: br i1 [[TMP51]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
9799; CHECK: [[MIDDLE_BLOCK]]:
98100; CHECK-NEXT: [[BIN_RDX:%.*]] = or <2 x i64> [[TMP44]], [[TMP43]]
99- ; CHECK-NEXT: [[BIN_RDX32 :%.*]] = or <2 x i64> [[TMP45]], [[BIN_RDX]]
100- ; CHECK-NEXT: [[BIN_RDX33 :%.*]] = or <2 x i64> [[TMP46]], [[BIN_RDX32 ]]
101- ; CHECK-NEXT: [[BIN_RDX34 :%.*]] = or <2 x i64> [[TMP47]], [[BIN_RDX33 ]]
102- ; CHECK-NEXT: [[BIN_RDX35 :%.*]] = or <2 x i64> [[TMP48]], [[BIN_RDX34 ]]
103- ; CHECK-NEXT: [[BIN_RDX36 :%.*]] = or <2 x i64> [[TMP49]], [[BIN_RDX35 ]]
104- ; CHECK-NEXT: [[BIN_RDX37:%.*]] = or <2 x i64> [[TMP50]], [[BIN_RDX36 ]]
101+ ; CHECK-NEXT: [[BIN_RDX18 :%.*]] = or <2 x i64> [[TMP45]], [[BIN_RDX]]
102+ ; CHECK-NEXT: [[BIN_RDX19 :%.*]] = or <2 x i64> [[TMP46]], [[BIN_RDX18 ]]
103+ ; CHECK-NEXT: [[BIN_RDX20 :%.*]] = or <2 x i64> [[TMP47]], [[BIN_RDX19 ]]
104+ ; CHECK-NEXT: [[BIN_RDX21 :%.*]] = or <2 x i64> [[TMP48]], [[BIN_RDX20 ]]
105+ ; CHECK-NEXT: [[BIN_RDX22 :%.*]] = or <2 x i64> [[TMP49]], [[BIN_RDX21 ]]
106+ ; CHECK-NEXT: [[BIN_RDX37:%.*]] = or <2 x i64> [[TMP50]], [[BIN_RDX22 ]]
105107; CHECK-NEXT: [[TMP52:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[BIN_RDX37]])
106108; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
107- ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
108- ; CHECK: [[SCALAR_PH]]:
109- ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
110- ; CHECK-NEXT: [[BC_RESUME_VAL3:%.*]] = phi ptr [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START]], %[[ENTRY]] ]
111- ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP52]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
109+ ; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
110+ ; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
111+ ; CHECK-NEXT: [[IND_END27:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC]]
112+ ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP2]], [[N_VEC]]
113+ ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 2
114+ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]]
115+ ; CHECK: [[VEC_EPILOG_PH]]:
116+ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
117+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
118+ ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
119+ ; CHECK-NEXT: [[N_MOD_VF24:%.*]] = urem i64 [[TMP2]], 2
120+ ; CHECK-NEXT: [[N_VEC25:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF24]]
121+ ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[START]], i64 [[N_VEC25]]
122+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[BC_RESUME_VAL]], i64 0
123+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <2 x i64> [[DOTSPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
124+ ; CHECK-NEXT: [[INDUCTION:%.*]] = add <2 x i64> [[DOTSPLAT]], <i64 0, i64 1>
125+ ; CHECK-NEXT: [[TMP55:%.*]] = insertelement <2 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
126+ ; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
127+ ; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
128+ ; CHECK-NEXT: [[INDEX29:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT35:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
129+ ; CHECK-NEXT: [[VEC_IND30:%.*]] = phi <2 x i64> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT31:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
130+ ; CHECK-NEXT: [[VEC_PHI32:%.*]] = phi <2 x i64> [ [[TMP55]], %[[VEC_EPILOG_PH]] ], [ [[TMP56:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
131+ ; CHECK-NEXT: [[TMP57:%.*]] = add i64 [[INDEX29]], 0
132+ ; CHECK-NEXT: [[NEXT_GEP33:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP57]]
133+ ; CHECK-NEXT: [[TMP58:%.*]] = getelementptr i8, ptr [[NEXT_GEP33]], i32 0
134+ ; CHECK-NEXT: [[WIDE_LOAD34:%.*]] = load <2 x i8>, ptr [[TMP58]], align 1
135+ ; CHECK-NEXT: [[TMP59:%.*]] = zext <2 x i8> [[WIDE_LOAD34]] to <2 x i64>
136+ ; CHECK-NEXT: [[TMP60:%.*]] = shl <2 x i64> [[VEC_IND30]], splat (i64 1)
137+ ; CHECK-NEXT: [[TMP61:%.*]] = shl <2 x i64> [[TMP59]], [[TMP60]]
138+ ; CHECK-NEXT: [[TMP56]] = or <2 x i64> [[TMP61]], [[VEC_PHI32]]
139+ ; CHECK-NEXT: [[INDEX_NEXT35]] = add nuw i64 [[INDEX29]], 2
140+ ; CHECK-NEXT: [[VEC_IND_NEXT31]] = add <2 x i64> [[VEC_IND30]], splat (i64 2)
141+ ; CHECK-NEXT: [[TMP62:%.*]] = icmp eq i64 [[INDEX_NEXT35]], [[N_VEC25]]
142+ ; CHECK-NEXT: br i1 [[TMP62]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
143+ ; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
144+ ; CHECK-NEXT: [[TMP54:%.*]] = call i64 @llvm.vector.reduce.or.v2i64(<2 x i64> [[TMP56]])
145+ ; CHECK-NEXT: [[CMP_N36:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC25]]
146+ ; CHECK-NEXT: br i1 [[CMP_N36]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
147+ ; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
148+ ; CHECK-NEXT: [[BC_RESUME_VAL26:%.*]] = phi i64 [ [[N_VEC25]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
149+ ; CHECK-NEXT: [[BC_RESUME_VAL28:%.*]] = phi ptr [ [[IND_END]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END27]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
150+ ; CHECK-NEXT: [[BC_MERGE_RDX37:%.*]] = phi i64 [ [[TMP54]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, %[[ITER_CHECK]] ], [ [[TMP52]], %[[VEC_EPILOG_ITER_CHECK]] ]
112151; CHECK-NEXT: br label %[[LOOP:.*]]
113152; CHECK: [[LOOP]]:
114- ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL ]], %[[SCALAR_PH ]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
115- ; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX ]], %[[SCALAR_PH ]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
116- ; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL3 ]], %[[SCALAR_PH ]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
153+ ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL26 ]], %[[VEC_EPILOG_SCALAR_PH ]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
154+ ; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[BC_MERGE_RDX37 ]], %[[VEC_EPILOG_SCALAR_PH ]] ], [ [[RED_NEXT:%.*]], %[[LOOP]] ]
155+ ; CHECK-NEXT: [[PTR_IV:%.*]] = phi ptr [ [[BC_RESUME_VAL28 ]], %[[VEC_EPILOG_SCALAR_PH ]] ], [ [[PTR_IV_NEXT:%.*]], %[[LOOP]] ]
117156; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[PTR_IV]], align 1
118157; CHECK-NEXT: [[CONV3:%.*]] = zext i8 [[TMP53]] to i64
119158; CHECK-NEXT: [[MUL:%.*]] = shl i64 [[IV]], 1
@@ -124,9 +163,9 @@ define i1 @select_exit_cond(ptr %start, ptr %end, i64 %N) {
124163; CHECK-NEXT: [[CMP_I166_I:%.*]] = icmp ult ptr [[PTR_IV]], [[END]]
125164; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i64 [[IV]], [[N]]
126165; CHECK-NEXT: [[AND:%.*]] = select i1 [[CMP_I166_I]], i1 [[CMP2]], i1 false
127- ; CHECK-NEXT: br i1 [[AND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP3 :![0-9]+]]
166+ ; CHECK-NEXT: br i1 [[AND]], label %[[LOOP]], label %[[EXIT]], !llvm.loop [[LOOP4 :![0-9]+]]
128167; CHECK: [[EXIT]]:
129- ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP52]], %[[MIDDLE_BLOCK]] ]
168+ ; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], %[[LOOP]] ], [ [[TMP52]], %[[MIDDLE_BLOCK]] ], [ [[TMP54]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ]
130169; CHECK-NEXT: [[RES:%.*]] = icmp eq i64 [[RED_NEXT_LCSSA]], 0
131170; CHECK-NEXT: ret i1 [[RES]]
132171;
@@ -157,5 +196,6 @@ exit:
157196; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
158197; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
159198; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
160- ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
199+ ; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
200+ ; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]}
161201;.
0 commit comments