Skip to content

Commit 13ed4a4

Browse files
fhahnmahesh-attarde
authored andcommitted
[AArch64] Add tests for partial reduction of shl.
Add tests with partial reductions where the operand is a shift instead of a mul.
1 parent 9248396 commit 13ed4a4

File tree

1 file changed

+148
-0
lines changed

1 file changed

+148
-0
lines changed

llvm/test/CodeGen/AArch64/neon-partial-reduce-dot-product.ll

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1255,3 +1255,151 @@ entry:
12551255
%partial.reduce = tail call <2 x i64> @llvm.vector.partial.reduce.add(<2 x i64> %acc, <8 x i64> %input.wide)
12561256
ret <2 x i64> %partial.reduce
12571257
}
1258+
1259+
define <4 x i32> @partial_reduce_shl_sext_const_rhs6(<16 x i8> %l, <4 x i32> %part) {
1260+
; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs6:
1261+
; CHECK-COMMON: // %bb.0:
1262+
; CHECK-COMMON-NEXT: sshll v2.8h, v0.8b, #0
1263+
; CHECK-COMMON-NEXT: sshll2 v0.8h, v0.16b, #0
1264+
; CHECK-COMMON-NEXT: sshll v3.4s, v0.4h, #6
1265+
; CHECK-COMMON-NEXT: sshll2 v4.4s, v2.8h, #6
1266+
; CHECK-COMMON-NEXT: sshll v2.4s, v2.4h, #6
1267+
; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #6
1268+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1269+
; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1270+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1271+
; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1272+
; CHECK-COMMON-NEXT: ret
1273+
%ext = sext <16 x i8> %l to <16 x i32>
1274+
%shift = shl nsw <16 x i32> %ext, splat (i32 6)
1275+
%red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
1276+
ret <4 x i32> %red
1277+
}
1278+
1279+
define <4 x i32> @partial_reduce_shl_sext_const_rhs8(<16 x i8> %l, <4 x i32> %part) {
1280+
; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs8:
1281+
; CHECK-COMMON: // %bb.0:
1282+
; CHECK-COMMON-NEXT: sshll v2.8h, v0.8b, #0
1283+
; CHECK-COMMON-NEXT: sshll2 v0.8h, v0.16b, #0
1284+
; CHECK-COMMON-NEXT: sshll v3.4s, v0.4h, #8
1285+
; CHECK-COMMON-NEXT: sshll2 v4.4s, v2.8h, #8
1286+
; CHECK-COMMON-NEXT: sshll v2.4s, v2.4h, #8
1287+
; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #8
1288+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1289+
; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1290+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1291+
; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1292+
; CHECK-COMMON-NEXT: ret
1293+
%ext = sext <16 x i8> %l to <16 x i32>
1294+
%shift = shl nsw <16 x i32> %ext, splat (i32 8)
1295+
%red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
1296+
ret <4 x i32> %red
1297+
}
1298+
1299+
define <4 x i32> @partial_reduce_shl_sext_const_rhs_9(<16 x i8> %l, <4 x i32> %part) {
1300+
; CHECK-COMMON-LABEL: partial_reduce_shl_sext_const_rhs_9:
1301+
; CHECK-COMMON: // %bb.0:
1302+
; CHECK-COMMON-NEXT: ret
1303+
%ext = sext <16 x i8> %l to <16 x i32>
1304+
%shift = shl nsw <16 x i32> %ext, splat (i32 32)
1305+
%red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
1306+
ret <4 x i32> %red
1307+
}
1308+
1309+
define <4 x i32> @partial_reduce_shl_sext_non_const_rhs(<16 x i8> %l, <4 x i32> %part) {
1310+
; CHECK-COMMON-LABEL: partial_reduce_shl_sext_non_const_rhs:
1311+
; CHECK-COMMON: // %bb.0:
1312+
; CHECK-COMMON-NEXT: sshll v2.8h, v0.8b, #0
1313+
; CHECK-COMMON-NEXT: sshll2 v0.8h, v0.16b, #0
1314+
; CHECK-COMMON-NEXT: sshll v3.4s, v2.4h, #0
1315+
; CHECK-COMMON-NEXT: sshll2 v2.4s, v2.8h, #0
1316+
; CHECK-COMMON-NEXT: sshll v4.4s, v0.4h, #0
1317+
; CHECK-COMMON-NEXT: sshll2 v0.4s, v0.8h, #0
1318+
; CHECK-COMMON-NEXT: ushl v4.4s, v4.4s, v4.4s
1319+
; CHECK-COMMON-NEXT: ushl v2.4s, v2.4s, v2.4s
1320+
; CHECK-COMMON-NEXT: ushl v3.4s, v3.4s, v3.4s
1321+
; CHECK-COMMON-NEXT: ushl v0.4s, v0.4s, v0.4s
1322+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v3.4s
1323+
; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v4.4s
1324+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1325+
; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1326+
; CHECK-COMMON-NEXT: ret
1327+
%ext = sext <16 x i8> %l to <16 x i32>
1328+
%shift = shl nsw <16 x i32> %ext, %ext
1329+
%red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
1330+
ret <4 x i32> %red
1331+
}
1332+
1333+
define <4 x i32> @partial_reduce_shl_zext_const_rhs6(<16 x i8> %l, <4 x i32> %part) {
1334+
; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs6:
1335+
; CHECK-COMMON: // %bb.0:
1336+
; CHECK-COMMON-NEXT: ushll v2.8h, v0.8b, #0
1337+
; CHECK-COMMON-NEXT: ushll2 v0.8h, v0.16b, #0
1338+
; CHECK-COMMON-NEXT: ushll v3.4s, v0.4h, #6
1339+
; CHECK-COMMON-NEXT: ushll2 v4.4s, v2.8h, #6
1340+
; CHECK-COMMON-NEXT: ushll v2.4s, v2.4h, #6
1341+
; CHECK-COMMON-NEXT: ushll2 v0.4s, v0.8h, #6
1342+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1343+
; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1344+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1345+
; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1346+
; CHECK-COMMON-NEXT: ret
1347+
%ext = zext <16 x i8> %l to <16 x i32>
1348+
%shift = shl nsw <16 x i32> %ext, splat (i32 6)
1349+
%red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
1350+
ret <4 x i32> %red
1351+
}
1352+
1353+
define <4 x i32> @partial_reduce_shl_zext_const_rhs8(<16 x i8> %l, <4 x i32> %part) {
1354+
; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs8:
1355+
; CHECK-COMMON: // %bb.0:
1356+
; CHECK-COMMON-NEXT: ushll v2.8h, v0.8b, #0
1357+
; CHECK-COMMON-NEXT: ushll2 v0.8h, v0.16b, #0
1358+
; CHECK-COMMON-NEXT: ushll v3.4s, v0.4h, #8
1359+
; CHECK-COMMON-NEXT: ushll2 v4.4s, v2.8h, #8
1360+
; CHECK-COMMON-NEXT: ushll v2.4s, v2.4h, #8
1361+
; CHECK-COMMON-NEXT: ushll2 v0.4s, v0.8h, #8
1362+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1363+
; CHECK-COMMON-NEXT: add v2.4s, v4.4s, v3.4s
1364+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1365+
; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1366+
; CHECK-COMMON-NEXT: ret
1367+
%ext = zext <16 x i8> %l to <16 x i32>
1368+
%shift = shl nsw <16 x i32> %ext, splat (i32 8)
1369+
%red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
1370+
ret <4 x i32> %red
1371+
}
1372+
1373+
define <4 x i32> @partial_reduce_shl_zext_const_rhs_9(<16 x i8> %l, <4 x i32> %part) {
1374+
; CHECK-COMMON-LABEL: partial_reduce_shl_zext_const_rhs_9:
1375+
; CHECK-COMMON: // %bb.0:
1376+
; CHECK-COMMON-NEXT: ret
1377+
%ext = zext <16 x i8> %l to <16 x i32>
1378+
%shift = shl nsw <16 x i32> %ext, splat (i32 32)
1379+
%red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
1380+
ret <4 x i32> %red
1381+
}
1382+
1383+
define <4 x i32> @partial_reduce_shl_zext_non_const_rhs(<16 x i8> %l, <4 x i32> %part) {
1384+
; CHECK-COMMON-LABEL: partial_reduce_shl_zext_non_const_rhs:
1385+
; CHECK-COMMON: // %bb.0:
1386+
; CHECK-COMMON-NEXT: ushll v2.8h, v0.8b, #0
1387+
; CHECK-COMMON-NEXT: ushll2 v0.8h, v0.16b, #0
1388+
; CHECK-COMMON-NEXT: ushll v3.4s, v2.4h, #0
1389+
; CHECK-COMMON-NEXT: ushll2 v2.4s, v2.8h, #0
1390+
; CHECK-COMMON-NEXT: ushll v4.4s, v0.4h, #0
1391+
; CHECK-COMMON-NEXT: ushll2 v0.4s, v0.8h, #0
1392+
; CHECK-COMMON-NEXT: ushl v4.4s, v4.4s, v4.4s
1393+
; CHECK-COMMON-NEXT: ushl v2.4s, v2.4s, v2.4s
1394+
; CHECK-COMMON-NEXT: ushl v3.4s, v3.4s, v3.4s
1395+
; CHECK-COMMON-NEXT: ushl v0.4s, v0.4s, v0.4s
1396+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v3.4s
1397+
; CHECK-COMMON-NEXT: add v2.4s, v2.4s, v4.4s
1398+
; CHECK-COMMON-NEXT: add v1.4s, v1.4s, v2.4s
1399+
; CHECK-COMMON-NEXT: add v0.4s, v1.4s, v0.4s
1400+
; CHECK-COMMON-NEXT: ret
1401+
%ext = zext <16 x i8> %l to <16 x i32>
1402+
%shift = shl nsw <16 x i32> %ext, %ext
1403+
%red = tail call <4 x i32> @llvm.vector.partial.reduce.add.v4i32.v16i32(<4 x i32> %part, <16 x i32> %shift)
1404+
ret <4 x i32> %red
1405+
}

0 commit comments

Comments
 (0)