@@ -1218,6 +1218,133 @@ exit:
1218
1218
ret void
1219
1219
}
1220
1220
1221
+ define void @test_vector_tc_eq_16 (ptr %A ) {
1222
+ ; VF8UF1-LABEL: define void @test_vector_tc_eq_16(
1223
+ ; VF8UF1-SAME: ptr [[A:%.*]]) {
1224
+ ; VF8UF1-NEXT: [[ENTRY:.*]]:
1225
+ ; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1226
+ ; VF8UF1: [[VECTOR_PH]]:
1227
+ ; VF8UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
1228
+ ; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]]
1229
+ ; VF8UF1: [[VECTOR_BODY]]:
1230
+ ; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1231
+ ; VF8UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1232
+ ; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1
1233
+ ; VF8UF1-NEXT: [[TMP1:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
1234
+ ; VF8UF1-NEXT: store <8 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1
1235
+ ; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1236
+ ; VF8UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
1237
+ ; VF8UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1238
+ ; VF8UF1: [[MIDDLE_BLOCK]]:
1239
+ ; VF8UF1-NEXT: br label %[[SCALAR_PH]]
1240
+ ; VF8UF1: [[SCALAR_PH]]:
1241
+ ; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1242
+ ; VF8UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
1243
+ ; VF8UF1-NEXT: br label %[[LOOP:.*]]
1244
+ ; VF8UF1: [[LOOP]]:
1245
+ ; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1246
+ ; VF8UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
1247
+ ; VF8UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
1248
+ ; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
1249
+ ; VF8UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
1250
+ ; VF8UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
1251
+ ; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1252
+ ; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
1253
+ ; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]]
1254
+ ; VF8UF1: [[EXIT]]:
1255
+ ; VF8UF1-NEXT: ret void
1256
+ ;
1257
+ ; VF8UF2-LABEL: define void @test_vector_tc_eq_16(
1258
+ ; VF8UF2-SAME: ptr [[A:%.*]]) {
1259
+ ; VF8UF2-NEXT: [[ENTRY:.*]]:
1260
+ ; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1261
+ ; VF8UF2: [[VECTOR_PH]]:
1262
+ ; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
1263
+ ; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
1264
+ ; VF8UF2: [[VECTOR_BODY]]:
1265
+ ; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1266
+ ; VF8UF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1267
+ ; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8
1268
+ ; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1
1269
+ ; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1
1270
+ ; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10)
1271
+ ; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10)
1272
+ ; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[NEXT_GEP]], align 1
1273
+ ; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP1]], align 1
1274
+ ; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1275
+ ; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
1276
+ ; VF8UF2-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1277
+ ; VF8UF2: [[MIDDLE_BLOCK]]:
1278
+ ; VF8UF2-NEXT: br label %[[SCALAR_PH]]
1279
+ ; VF8UF2: [[SCALAR_PH]]:
1280
+ ; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1281
+ ; VF8UF2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
1282
+ ; VF8UF2-NEXT: br label %[[LOOP:.*]]
1283
+ ; VF8UF2: [[LOOP]]:
1284
+ ; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1285
+ ; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
1286
+ ; VF8UF2-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
1287
+ ; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
1288
+ ; VF8UF2-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
1289
+ ; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
1290
+ ; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1291
+ ; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
1292
+ ; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1293
+ ; VF8UF2: [[EXIT]]:
1294
+ ; VF8UF2-NEXT: ret void
1295
+ ;
1296
+ ; VF16UF1-LABEL: define void @test_vector_tc_eq_16(
1297
+ ; VF16UF1-SAME: ptr [[A:%.*]]) {
1298
+ ; VF16UF1-NEXT: [[ENTRY:.*]]:
1299
+ ; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1300
+ ; VF16UF1: [[VECTOR_PH]]:
1301
+ ; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16
1302
+ ; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
1303
+ ; VF16UF1: [[VECTOR_BODY]]:
1304
+ ; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
1305
+ ; VF16UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]]
1306
+ ; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1
1307
+ ; VF16UF1-NEXT: [[TMP1:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10)
1308
+ ; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1
1309
+ ; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
1310
+ ; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
1311
+ ; VF16UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1312
+ ; VF16UF1: [[MIDDLE_BLOCK]]:
1313
+ ; VF16UF1-NEXT: br label %[[SCALAR_PH]]
1314
+ ; VF16UF1: [[SCALAR_PH]]:
1315
+ ; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
1316
+ ; VF16UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ]
1317
+ ; VF16UF1-NEXT: br label %[[LOOP:.*]]
1318
+ ; VF16UF1: [[LOOP]]:
1319
+ ; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
1320
+ ; VF16UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ]
1321
+ ; VF16UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1
1322
+ ; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1
1323
+ ; VF16UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10
1324
+ ; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1
1325
+ ; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1
1326
+ ; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17
1327
+ ; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
1328
+ ; VF16UF1: [[EXIT]]:
1329
+ ; VF16UF1-NEXT: ret void
1330
+ ;
1331
+ entry:
1332
+ br label %loop
1333
+
1334
+ loop:
1335
+ %iv = phi i64 [ 0 , %entry ], [ %iv.next , %loop ]
1336
+ %p.src = phi ptr [ %A , %entry ], [ %p.src.next , %loop ]
1337
+ %p.src.next = getelementptr inbounds i8 , ptr %p.src , i64 1
1338
+ %l = load i8 , ptr %p.src , align 1
1339
+ %add = add nsw i8 %l , 10
1340
+ store i8 %add , ptr %p.src
1341
+ %iv.next = add nsw i64 %iv , 1
1342
+ %cmp = icmp eq i64 %iv.next , 17
1343
+ br i1 %cmp , label %exit , label %loop
1344
+
1345
+ exit:
1346
+ ret void
1347
+ }
1221
1348
;.
1222
1349
; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1223
1350
; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
@@ -1227,18 +1354,24 @@ exit:
1227
1354
; VF8UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
1228
1355
; VF8UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
1229
1356
; VF8UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
1357
+ ; VF8UF1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
1358
+ ; VF8UF1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
1230
1359
;.
1231
1360
; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1232
1361
; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
1233
1362
; VF8UF2: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
1234
1363
; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
1235
1364
; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1236
1365
; VF8UF2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
1366
+ ; VF8UF2: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
1367
+ ; VF8UF2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
1237
1368
;.
1238
1369
; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
1239
1370
; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"}
1240
1371
; VF16UF1: [[META2]] = !{!"llvm.loop.isvectorized", i32 1}
1241
1372
; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
1242
1373
; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
1243
1374
; VF16UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]}
1375
+ ; VF16UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]}
1376
+ ; VF16UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]}
1244
1377
;.
0 commit comments