@@ -1255,3 +1255,151 @@ entry:
12551255 %partial.reduce = tail call <2 x i64 > @llvm.vector.partial.reduce.add (<2 x i64 > %acc , <8 x i64 > %input.wide )
12561256 ret <2 x i64 > %partial.reduce
12571257}
1258+
1259+ define <4 x i32 > @partial_reduce_shl_sext_const_rhs6 (<16 x i8 > %l , <4 x i32 > %part ) {
1260+ ; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs6:
1261+ ; CHECK-COMMON: // %bb.0:
1262+ ; CHECK-COMMON-NEXT: sshll v2.8h, v0.8b, #0
1263+ ; CHECK-COMMON-NEXT: sshll2 v0.8h, v0.16b, #0
1264+ ; CHECK-COMMON-NEXT: sshll v3.4s, v0.4h, #6
1265+ ; CHECK-COMMON-NEXT: sshll2 v4.4s, v2.8h, #6
1266+ ; CHECK-COMMON-NEXT: sshll v2.4s, v2.4h, #6
1267+ ; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #6
1268+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1269+ ; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1270+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1271+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1272+ ; CHECK-COMMON-NEXT: ret
1273+ %ext = sext <16 x i8 > %l to <16 x i32 >
1274+ %shift = shl nsw <16 x i32 > %ext , splat (i32 6 )
1275+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1276+ ret <4 x i32 > %red
1277+ }
1278+
1279+ define <4 x i32 > @partial_reduce_shl_sext_const_rhs8 (<16 x i8 > %l , <4 x i32 > %part ) {
1280+ ; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs8:
1281+ ; CHECK-COMMON: // %bb.0:
1282+ ; CHECK-COMMON-NEXT: sshll v2.8h, v0.8b, #0
1283+ ; CHECK-COMMON-NEXT: sshll2 v0.8h, v0.16b, #0
1284+ ; CHECK-COMMON-NEXT: sshll v3.4s, v0.4h, #8
1285+ ; CHECK-COMMON-NEXT: sshll2 v4.4s, v2.8h, #8
1286+ ; CHECK-COMMON-NEXT: sshll v2.4s, v2.4h, #8
1287+ ; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #8
1288+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1289+ ; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1290+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1291+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1292+ ; CHECK-COMMON-NEXT: ret
1293+ %ext = sext <16 x i8 > %l to <16 x i32 >
1294+ %shift = shl nsw <16 x i32 > %ext , splat (i32 8 )
1295+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1296+ ret <4 x i32 > %red
1297+ }
1298+
1299+ define <4 x i32 > @partial_reduce_shl_sext_const_rhs_9 (<16 x i8 > %l , <4 x i32 > %part ) {
1300+ ; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs_9:
1301+ ; CHECK-COMMON: // %bb.0:
1302+ ; CHECK-COMMON-NEXT: ret
1303+ %ext = sext <16 x i8 > %l to <16 x i32 >
1304+ %shift = shl nsw <16 x i32 > %ext , splat (i32 32 )
1305+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1306+ ret <4 x i32 > %red
1307+ }
1308+
1309+ define <4 x i32 > @partial_reduce_shl_sext_non_const_rhs (<16 x i8 > %l , <4 x i32 > %part ) {
1310+ ; CHECK-COMMON-LABEL: partial_reduce_shl_sext_non_const_rhs:
1311+ ; CHECK-COMMON: // %bb.0:
1312+ ; CHECK-COMMON-NEXT: sshll v2.8h, v0.8b, #0
1313+ ; CHECK-COMMON-NEXT: sshll2 v0.8h, v0.16b, #0
1314+ ; CHECK-COMMON-NEXT: sshll v3.4s, v2.4h, #0
1315+ ; CHECK-COMMON-NEXT: sshll2 v2.4s, v2.8h, #0
1316+ ; CHECK-COMMON-NEXT: sshll v4.4s, v0.4h, #0
1317+ ; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #0
1318+ ; CHECK-COMMON-NEXT: ushl v4.4s, v4.4s, v4.4s
1319+ ; CHECK-COMMON-NEXT: ushl v2.4s, v2.4s, v2.4s
1320+ ; CHECK-COMMON-NEXT: ushl v3.4s, v3.4s, v3.4s
1321+ ; CHECK-COMMON-NEXT: ushl v0.4s, v0.4s, v0.4s
1322+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v3.4s
1323+ ; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v4.4s
1324+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1325+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1326+ ; CHECK-COMMON-NEXT: ret
1327+ %ext = sext <16 x i8 > %l to <16 x i32 >
1328+ %shift = shl nsw <16 x i32 > %ext , %ext
1329+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1330+ ret <4 x i32 > %red
1331+ }
1332+
1333+ define <4 x i32 > @partial_reduce_shl_zext_const_rhs6 (<16 x i8 > %l , <4 x i32 > %part ) {
1334+ ; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs6:
1335+ ; CHECK-COMMON: // %bb.0:
1336+ ; CHECK-COMMON-NEXT: ushll v2.8h, v0.8b, #0
1337+ ; CHECK-COMMON-NEXT: ushll2 v0.8h, v0.16b, #0
1338+ ; CHECK-COMMON-NEXT: ushll v3.4s, v0.4h, #6
1339+ ; CHECK-COMMON-NEXT: ushll2 v4.4s, v2.8h, #6
1340+ ; CHECK-COMMON-NEXT: ushll v2.4s, v2.4h, #6
1341+ ; CHECK-COMMON-NEXT: ushll2 v0.4s, v0.8h, #6
1342+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1343+ ; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1344+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1345+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1346+ ; CHECK-COMMON-NEXT: ret
1347+ %ext = zext <16 x i8 > %l to <16 x i32 >
1348+ %shift = shl nsw <16 x i32 > %ext , splat (i32 6 )
1349+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1350+ ret <4 x i32 > %red
1351+ }
1352+
1353+ define <4 x i32 > @partial_reduce_shl_zext_const_rhs8 (<16 x i8 > %l , <4 x i32 > %part ) {
1354+ ; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs8:
1355+ ; CHECK-COMMON: // %bb.0:
1356+ ; CHECK-COMMON-NEXT: ushll v2.8h, v0.8b, #0
1357+ ; CHECK-COMMON-NEXT: ushll2 v0.8h, v0.16b, #0
1358+ ; CHECK-COMMON-NEXT: ushll v3.4s, v0.4h, #8
1359+ ; CHECK-COMMON-NEXT: ushll2 v4.4s, v2.8h, #8
1360+ ; CHECK-COMMON-NEXT: ushll v2.4s, v2.4h, #8
1361+ ; CHECK-COMMON-NEXT: ushll2 v0.4s, v0.8h, #8
1362+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1363+ ; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1364+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1365+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1366+ ; CHECK-COMMON-NEXT: ret
1367+ %ext = zext <16 x i8 > %l to <16 x i32 >
1368+ %shift = shl nsw <16 x i32 > %ext , splat (i32 8 )
1369+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1370+ ret <4 x i32 > %red
1371+ }
1372+
1373+ define <4 x i32 > @partial_reduce_shl_zext_const_rhs_9 (<16 x i8 > %l , <4 x i32 > %part ) {
1374+ ; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs_9:
1375+ ; CHECK-COMMON: // %bb.0:
1376+ ; CHECK-COMMON-NEXT: ret
1377+ %ext = zext <16 x i8 > %l to <16 x i32 >
1378+ %shift = shl nsw <16 x i32 > %ext , splat (i32 32 )
1379+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1380+ ret <4 x i32 > %red
1381+ }
1382+
1383+ define <4 x i32 > @partial_reduce_shl_zext_non_const_rhs (<16 x i8 > %l , <4 x i32 > %part ) {
1384+ ; CHECK-COMMON-LABEL: partial_reduce_shl_zext_non_const_rhs:
1385+ ; CHECK-COMMON: // %bb.0:
1386+ ; CHECK-COMMON-NEXT: ushll v2.8h, v0.8b, #0
1387+ ; CHECK-COMMON-NEXT: ushll2 v0.8h, v0.16b, #0
1388+ ; CHECK-COMMON-NEXT: ushll v3.4s, v2.4h, #0
1389+ ; CHECK-COMMON-NEXT: ushll2 v2.4s, v2.8h, #0
1390+ ; CHECK-COMMON-NEXT: ushll v4.4s, v0.4h, #0
1391+ ; CHECK-COMMON-NEXT: ushll2 v0.4s, v0.8h, #0
1392+ ; CHECK-COMMON-NEXT: ushl v4.4s, v4.4s, v4.4s
1393+ ; CHECK-COMMON-NEXT: ushl v2.4s, v2.4s, v2.4s
1394+ ; CHECK-COMMON-NEXT: ushl v3.4s, v3.4s, v3.4s
1395+ ; CHECK-COMMON-NEXT: ushl v0.4s, v0.4s, v0.4s
1396+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v3.4s
1397+ ; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v4.4s
1398+ ; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1399+ ; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1400+ ; CHECK-COMMON-NEXT: ret
1401+ %ext = zext <16 x i8 > %l to <16 x i32 >
1402+ %shift = shl nsw <16 x i32 > %ext , %ext
1403+ %red = tail call <4 x i32 > @llvm.vector.partial.reduce.add.v4i32.v16i32 (<4 x i32 > %part , <16 x i32 > %shift )
1404+ ret <4 x i32 > %red
1405+ }
0 commit comments