@@ -1211,6 +1211,130 @@ exit:
12111211 ret i32 %or
12121212}
12131213
1214+ ; Check if the vplan-based cost model select same VF to the legacy cost model.
1215+ ; Reduced from: https://github.com/llvm/llvm-project/issues/115744#issuecomment-2670479463
1216+ define i32 @g (i64 %n ) {
1217+ ; CHECK-LABEL: @g(
1218+ ; CHECK-NEXT: iter.check:
1219+ ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[N:%.*]] to i32
1220+ ; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 1
1221+ ; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 4
1222+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
1223+ ; CHECK: vector.scevcheck:
1224+ ; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt i64 [[N]], 4294967295
1225+ ; CHECK-NEXT: br i1 [[TMP2]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
1226+ ; CHECK: vector.main.loop.iter.check:
1227+ ; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i32 [[TMP1]], 16
1228+ ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
1229+ ; CHECK: vector.ph:
1230+ ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 16
1231+ ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
1232+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
1233+ ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
1234+ ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
1235+ ; CHECK: vector.body:
1236+ ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1237+ ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1238+ ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
1239+ ; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
1240+ ; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ]
1241+ ; CHECK-NEXT: [[VEC_PHI4:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
1242+ ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
1243+ ; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
1244+ ; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4)
1245+ ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i32> [[VEC_IND]] to <4 x i64>
1246+ ; CHECK-NEXT: [[TMP4:%.*]] = zext <4 x i32> [[STEP_ADD]] to <4 x i64>
1247+ ; CHECK-NEXT: [[TMP5:%.*]] = zext <4 x i32> [[STEP_ADD_2]] to <4 x i64>
1248+ ; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i32> [[STEP_ADD_3]] to <4 x i64>
1249+ ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq <4 x i64> [[BROADCAST_SPLAT]], [[TMP3]]
1250+ ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i64> [[BROADCAST_SPLAT]], [[TMP4]]
1251+ ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i64> [[BROADCAST_SPLAT]], [[TMP5]]
1252+ ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <4 x i64> [[BROADCAST_SPLAT]], [[TMP6]]
1253+ ; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 2)
1254+ ; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 2)
1255+ ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 2)
1256+ ; CHECK-NEXT: [[TMP14:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 2)
1257+ ; CHECK-NEXT: [[TMP15]] = or <4 x i32> [[TMP11]], [[VEC_PHI]]
1258+ ; CHECK-NEXT: [[TMP16]] = or <4 x i32> [[TMP12]], [[VEC_PHI2]]
1259+ ; CHECK-NEXT: [[TMP17]] = or <4 x i32> [[TMP13]], [[VEC_PHI3]]
1260+ ; CHECK-NEXT: [[TMP18]] = or <4 x i32> [[TMP14]], [[VEC_PHI4]]
1261+ ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16
1262+ ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4)
1263+ ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
1264+ ; CHECK-NEXT: br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
1265+ ; CHECK: middle.block:
1266+ ; CHECK-NEXT: [[BIN_RDX:%.*]] = or <4 x i32> [[TMP16]], [[TMP15]]
1267+ ; CHECK-NEXT: [[BIN_RDX5:%.*]] = or <4 x i32> [[TMP17]], [[BIN_RDX]]
1268+ ; CHECK-NEXT: [[BIN_RDX6:%.*]] = or <4 x i32> [[TMP18]], [[BIN_RDX5]]
1269+ ; CHECK-NEXT: [[TMP20:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[BIN_RDX6]])
1270+ ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
1271+ ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
1272+ ; CHECK: vec.epilog.iter.check:
1273+ ; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i32 [[TMP1]], [[N_VEC]]
1274+ ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i32 [[N_VEC_REMAINING]], 4
1275+ ; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]]
1276+ ; CHECK: vec.epilog.ph:
1277+ ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
1278+ ; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP20]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
1279+ ; CHECK-NEXT: [[N_MOD_VF7:%.*]] = urem i32 [[TMP1]], 4
1280+ ; CHECK-NEXT: [[N_VEC8:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF7]]
1281+ ; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0
1282+ ; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1283+ ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
1284+ ; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[BC_MERGE_RDX]], i32 0
1285+ ; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x i64> poison, i64 [[N]], i64 0
1286+ ; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT13]], <4 x i64> poison, <4 x i32> zeroinitializer
1287+ ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
1288+ ; CHECK: vec.epilog.vector.body:
1289+ ; CHECK-NEXT: [[INDEX9:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
1290+ ; CHECK-NEXT: [[VEC_IND10:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
1291+ ; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i32> [ [[TMP21]], [[VEC_EPILOG_PH]] ], [ [[TMP25:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
1292+ ; CHECK-NEXT: [[TMP22:%.*]] = zext <4 x i32> [[VEC_IND10]] to <4 x i64>
1293+ ; CHECK-NEXT: [[TMP23:%.*]] = icmp eq <4 x i64> [[BROADCAST_SPLAT14]], [[TMP22]]
1294+ ; CHECK-NEXT: [[TMP24:%.*]] = select <4 x i1> [[TMP23]], <4 x i32> zeroinitializer, <4 x i32> splat (i32 2)
1295+ ; CHECK-NEXT: [[TMP25]] = or <4 x i32> [[TMP24]], [[VEC_PHI12]]
1296+ ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i32 [[INDEX9]], 4
1297+ ; CHECK-NEXT: [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND10]], splat (i32 4)
1298+ ; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT15]], [[N_VEC8]]
1299+ ; CHECK-NEXT: br i1 [[TMP26]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
1300+ ; CHECK: vec.epilog.middle.block:
1301+ ; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[TMP25]])
1302+ ; CHECK-NEXT: [[CMP_N16:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC8]]
1303+ ; CHECK-NEXT: br i1 [[CMP_N16]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
1304+ ; CHECK: vec.epilog.scalar.ph:
1305+ ; CHECK-NEXT: [[BC_RESUME_VAL17:%.*]] = phi i32 [ [[N_VEC8]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK:%.*]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ]
1306+ ; CHECK-NEXT: [[BC_MERGE_RDX18:%.*]] = phi i32 [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ITER_CHECK]] ], [ [[TMP20]], [[VEC_EPILOG_ITER_CHECK]] ]
1307+ ; CHECK-NEXT: br label [[LOOP:%.*]]
1308+ ; CHECK: loop:
1309+ ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL17]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
1310+ ; CHECK-NEXT: [[SELECT:%.*]] = phi i32 [ [[BC_MERGE_RDX18]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SELECT_NEXT:%.*]], [[LOOP]] ]
1311+ ; CHECK-NEXT: [[IV_WIDEN:%.*]] = zext i32 [[IV]] to i64
1312+ ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[N]], [[IV_WIDEN]]
1313+ ; CHECK-NEXT: [[SELECT_I:%.*]] = select i1 [[EXITCOND]], i32 0, i32 2
1314+ ; CHECK-NEXT: [[SELECT_NEXT]] = or i32 [[SELECT_I]], [[SELECT]]
1315+ ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
1316+ ; CHECK-NEXT: br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP32:![0-9]+]]
1317+ ; CHECK: exit:
1318+ ; CHECK-NEXT: [[SELECT_NEXT_LCSSA:%.*]] = phi i32 [ [[SELECT_NEXT]], [[LOOP]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ [[TMP27]], [[VEC_EPILOG_MIDDLE_BLOCK]] ]
1319+ ; CHECK-NEXT: ret i32 [[SELECT_NEXT_LCSSA]]
1320+ ;
1321+ entry:
1322+ br label %loop
1323+
1324+ loop:
1325+ %iv = phi i32 [ 0 , %entry ], [ %iv.next , %loop ]
1326+ %select = phi i32 [ 0 , %entry ], [ %select.next , %loop ]
1327+ %iv.widen = zext i32 %iv to i64
1328+ %exitcond = icmp eq i64 %n , %iv.widen
1329+ %select.i = select i1 %exitcond , i32 0 , i32 2
1330+ %select.next = or i32 %select.i , %select
1331+ %iv.next = add i32 %iv , 1
1332+ br i1 %exitcond , label %exit , label %loop
1333+
1334+ exit:
1335+ ret i32 %select.next
1336+ }
1337+
12141338declare void @llvm.assume (i1 noundef) #0
12151339
12161340attributes #0 = { "target-cpu" ="penryn" }
0 commit comments