@@ -1255,3 +1255,151 @@ entry:
1255
1255
%partial.reduce = tail call <2 x i64 > @llvm.vector.partial.reduce.add (<2 x i64 > %acc , <8 x i64 > %input.wide )
1256
1256
ret <2 x i64 > %partial.reduce
1257
1257
}
1258
+
1259
+ define <4 x i32 > @partial_reduce_shl_sext_const_rhs6 (<16 x i8 > %l , <4 x i32 > %part ) {
1260
+ ; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs6:
1261
+ ; CHECK-COMMON: // %bb.0:
1262
+ ; CHECK-COMMON-NEXT: sshll v2.8h, v0.8b, #0
1263
+ ; CHECK-COMMON-NEXT: sshll2 v0.8h, v0.16b, #0
1264
+ ; CHECK-COMMON-NEXT: sshll v3.4s, v0.4h, #6
1265
+ ; CHECK-COMMON-NEXT: sshll2 v4.4s, v2.8h, #6
1266
+ ; CHECK-COMMON-NEXT: sshll v2.4s, v2.4h, #6
1267
+ ; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #6
1268
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1269
+ ; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1270
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1271
+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1272
+ ; CHECK-COMMON-NEXT: ret
1273
+ %ext = sext <16 x i8 > %l to <16 x i32 >
1274
+ %shift = shl nsw <16 x i32 > %ext , splat (i32 6 )
1275
+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1276
+ ret <4 x i32 > %red
1277
+ }
1278
+
1279
+ define <4 x i32 > @partial_reduce_shl_sext_const_rhs8 (<16 x i8 > %l , <4 x i32 > %part ) {
1280
+ ; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs8:
1281
+ ; CHECK-COMMON: // %bb.0:
1282
+ ; CHECK-COMMON-NEXT: sshll v2.8h, v0.8b, #0
1283
+ ; CHECK-COMMON-NEXT: sshll2 v0.8h, v0.16b, #0
1284
+ ; CHECK-COMMON-NEXT: sshll v3.4s, v0.4h, #8
1285
+ ; CHECK-COMMON-NEXT: sshll2 v4.4s, v2.8h, #8
1286
+ ; CHECK-COMMON-NEXT: sshll v2.4s, v2.4h, #8
1287
+ ; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #8
1288
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1289
+ ; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1290
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1291
+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1292
+ ; CHECK-COMMON-NEXT: ret
1293
+ %ext = sext <16 x i8 > %l to <16 x i32 >
1294
+ %shift = shl nsw <16 x i32 > %ext , splat (i32 8 )
1295
+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1296
+ ret <4 x i32 > %red
1297
+ }
1298
+
1299
+ define <4 x i32 > @partial_reduce_shl_sext_const_rhs_9 (<16 x i8 > %l , <4 x i32 > %part ) {
1300
+ ; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs_9:
1301
+ ; CHECK-COMMON: // %bb.0:
1302
+ ; CHECK-COMMON-NEXT: ret
1303
+ %ext = sext <16 x i8 > %l to <16 x i32 >
1304
+ %shift = shl nsw <16 x i32 > %ext , splat (i32 32 )
1305
+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1306
+ ret <4 x i32 > %red
1307
+ }
1308
+
1309
+ define <4 x i32 > @partial_reduce_shl_sext_non_const_rhs (<16 x i8 > %l , <4 x i32 > %part ) {
1310
+ ; CHECK-COMMON-LABEL: partial_reduce_shl_sext_non_const_rhs:
1311
+ ; CHECK-COMMON: // %bb.0:
1312
+ ; CHECK-COMMON-NEXT: sshll v2.8h, v0.8b, #0
1313
+ ; CHECK-COMMON-NEXT: sshll2 v0.8h, v0.16b, #0
1314
+ ; CHECK-COMMON-NEXT: sshll v3.4s, v2.4h, #0
1315
+ ; CHECK-COMMON-NEXT: sshll2 v2.4s, v2.8h, #0
1316
+ ; CHECK-COMMON-NEXT: sshll v4.4s, v0.4h, #0
1317
+ ; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #0
1318
+ ; CHECK-COMMON-NEXT: ushl v4.4s, v4.4s, v4.4s
1319
+ ; CHECK-COMMON-NEXT: ushl v2.4s, v2.4s, v2.4s
1320
+ ; CHECK-COMMON-NEXT: ushl v3.4s, v3.4s, v3.4s
1321
+ ; CHECK-COMMON-NEXT: ushl v0.4s, v0.4s, v0.4s
1322
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v3.4s
1323
+ ; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v4.4s
1324
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1325
+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1326
+ ; CHECK-COMMON-NEXT: ret
1327
+ %ext = sext <16 x i8 > %l to <16 x i32 >
1328
+ %shift = shl nsw <16 x i32 > %ext , %ext
1329
+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1330
+ ret <4 x i32 > %red
1331
+ }
1332
+
1333
+ define <4 x i32 > @partial_reduce_shl_zext_const_rhs6 (<16 x i8 > %l , <4 x i32 > %part ) {
1334
+ ; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs6:
1335
+ ; CHECK-COMMON: // %bb.0:
1336
+ ; CHECK-COMMON-NEXT: ushll v2.8h, v0.8b, #0
1337
+ ; CHECK-COMMON-NEXT: ushll2 v0.8h, v0.16b, #0
1338
+ ; CHECK-COMMON-NEXT: ushll v3.4s, v0.4h, #6
1339
+ ; CHECK-COMMON-NEXT: ushll2 v4.4s, v2.8h, #6
1340
+ ; CHECK-COMMON-NEXT: ushll v2.4s, v2.4h, #6
1341
+ ; CHECK-COMMON-NEXT: ushll2 v0.4s, v0.8h, #6
1342
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1343
+ ; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1344
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1345
+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1346
+ ; CHECK-COMMON-NEXT: ret
1347
+ %ext = zext <16 x i8 > %l to <16 x i32 >
1348
+ %shift = shl nsw <16 x i32 > %ext , splat (i32 6 )
1349
+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1350
+ ret <4 x i32 > %red
1351
+ }
1352
+
1353
+ define <4 x i32 > @partial_reduce_shl_zext_const_rhs8 (<16 x i8 > %l , <4 x i32 > %part ) {
1354
+ ; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs8:
1355
+ ; CHECK-COMMON: // %bb.0:
1356
+ ; CHECK-COMMON-NEXT: ushll v2.8h, v0.8b, #0
1357
+ ; CHECK-COMMON-NEXT: ushll2 v0.8h, v0.16b, #0
1358
+ ; CHECK-COMMON-NEXT: ushll v3.4s, v0.4h, #8
1359
+ ; CHECK-COMMON-NEXT: ushll2 v4.4s, v2.8h, #8
1360
+ ; CHECK-COMMON-NEXT: ushll v2.4s, v2.4h, #8
1361
+ ; CHECK-COMMON-NEXT: ushll2 v0.4s, v0.8h, #8
1362
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1363
+ ; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1364
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1365
+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1366
+ ; CHECK-COMMON-NEXT: ret
1367
+ %ext = zext <16 x i8 > %l to <16 x i32 >
1368
+ %shift = shl nsw <16 x i32 > %ext , splat (i32 8 )
1369
+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1370
+ ret <4 x i32 > %red
1371
+ }
1372
+
1373
+ define <4 x i32 > @partial_reduce_shl_zext_const_rhs_9 (<16 x i8 > %l , <4 x i32 > %part ) {
1374
+ ; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs_9:
1375
+ ; CHECK-COMMON: // %bb.0:
1376
+ ; CHECK-COMMON-NEXT: ret
1377
+ %ext = zext <16 x i8 > %l to <16 x i32 >
1378
+ %shift = shl nsw <16 x i32 > %ext , splat (i32 32 )
1379
+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1380
+ ret <4 x i32 > %red
1381
+ }
1382
+
1383
+ define <4 x i32 > @partial_reduce_shl_zext_non_const_rhs (<16 x i8 > %l , <4 x i32 > %part ) {
1384
+ ; CHECK-COMMON-LABEL: partial_reduce_shl_zext_non_const_rhs:
1385
+ ; CHECK-COMMON: // %bb.0:
1386
+ ; CHECK-COMMON-NEXT: ushll v2.8h, v0.8b, #0
1387
+ ; CHECK-COMMON-NEXT: ushll2 v0.8h, v0.16b, #0
1388
+ ; CHECK-COMMON-NEXT: ushll v3.4s, v2.4h, #0
1389
+ ; CHECK-COMMON-NEXT: ushll2 v2.4s, v2.8h, #0
1390
+ ; CHECK-COMMON-NEXT: ushll v4.4s, v0.4h, #0
1391
+ ; CHECK-COMMON-NEXT: ushll2 v0.4s, v0.8h, #0
1392
+ ; CHECK-COMMON-NEXT: ushl v4.4s, v4.4s, v4.4s
1393
+ ; CHECK-COMMON-NEXT: ushl v2.4s, v2.4s, v2.4s
1394
+ ; CHECK-COMMON-NEXT: ushl v3.4s, v3.4s, v3.4s
1395
+ ; CHECK-COMMON-NEXT: ushl v0.4s, v0.4s, v0.4s
1396
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v3.4s
1397
+ ; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v4.4s
1398
+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1399
+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1400
+ ; CHECK-COMMON-NEXT: ret
1401
+ %ext = zext <16 x i8 > %l to <16 x i32 >
1402
+ %shift = shl nsw <16 x i32 > %ext , %ext
1403
+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1404
+ ret <4 x i32 > %red
1405
+ }
0 commit comments