Skip to content

Commit 7f5c069

Browse files
committed
[AArch64] Use SVE2 bit-sel instructions for some binary patterns.
We can use NBSL/BSL2N to implement the following operations via the corresponding identities: * EON(a, b) = BSL2N(a, a, b) * NAND(a, b) = NBSL(a, b, b) = NBSL(b, a, a) * NOR(a, b) = NBSL(a, b, a) = NBSL(b, a, b) * ORN(a, b) = BSL2N(a, b, a) These operations are currently lowered into at least two instructions because we don't have dedicated Neon/SVE instructions for them. With the appropriate pattern of NBSL/BSL2N we can lower them in a single instruction. P.S. We can also use NBSL to implement an unpredicated NOT(a) = NBSL(a, a, a). However, because of the tied register constraint, this may not be always profitable.
1 parent 46785cb commit 7f5c069

File tree

5 files changed

+57
-30
lines changed

5 files changed

+57
-30
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4034,6 +4034,36 @@ let Predicates = [HasSVE2_or_SME] in {
40344034
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", AArch64bsl2n>;
40354035
defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", AArch64nbsl>;
40364036

4037+
multiclass binary_bitwise<ValueType VT, SDPatternOperator InOp, SDPatternOperator OutOp> {
4038+
def : Pat<(InOp VT:$op1, VT:$op2), (OutOp $op1, $op2)>;
4039+
4040+
def : Pat<(SVEType<VT>.DSub (InOp V64:$op1, V64:$op2)),
4041+
(EXTRACT_SUBREG (OutOp (INSERT_SUBREG (IMPLICIT_DEF), (SVEType<VT>.DSub $op1), dsub),
4042+
(INSERT_SUBREG (IMPLICIT_DEF), (SVEType<VT>.DSub $op2), dsub)), dsub)>;
4043+
4044+
def : Pat<(SVEType<VT>.ZSub (InOp V128:$op1, V128:$op2)),
4045+
(EXTRACT_SUBREG (OutOp (INSERT_SUBREG (IMPLICIT_DEF), (SVEType<VT>.ZSub $op1), zsub),
4046+
(INSERT_SUBREG (IMPLICIT_DEF), (SVEType<VT>.ZSub $op2), zsub)), zsub)>;
4047+
}
4048+
4049+
foreach VT = [nxv16i8, nxv8i16, nxv4i32, nxv2i64] in {
4050+
// EON (a, b) = BSL2N (a, a, b) = BSL2N (b, b, a)
4051+
defm : binary_bitwise<VT, PatFrag<(ops node:$op1, node:$op2), (vnot (xor node:$op1, node:$op2))>,
4052+
OutPatFrag<(ops node:$op1, node:$op2), (BSL2N_ZZZZ $op1, $op1, $op2)>>;
4053+
4054+
// NAND (a, b) = NBSL (a, b, b) = NBSL (b, a, a)
4055+
defm : binary_bitwise<VT, PatFrag<(ops node:$op1, node:$op2), (vnot (and node:$op1, node:$op2))>,
4056+
OutPatFrag<(ops node:$op1, node:$op2), (NBSL_ZZZZ $op2, $op1, $op1)>>;
4057+
4058+
// NOR (a, b) = NBSL (a, b, a) = NBSL (b, a, b)
4059+
defm : binary_bitwise<VT, PatFrag<(ops node:$op1, node:$op2), (vnot (or node:$op1, node:$op2))>,
4060+
OutPatFrag<(ops node:$op1, node:$op2), (NBSL_ZZZZ $op2, $op1, $op2)>>;
4061+
4062+
// ORN (a, b) = BSL2N (a, b, a)
4063+
defm : binary_bitwise<VT, PatFrag<(ops node:$op1, node:$op2), (or node:$op1, (vnot node:$op2))>,
4064+
OutPatFrag<(ops node:$op1, node:$op2), (BSL2N_ZZZZ $op1, $op2, $op1)>>;
4065+
}
4066+
40374067
// SVE2 bitwise xor and rotate right by immediate
40384068
defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>;
40394069

llvm/test/CodeGen/AArch64/bsl.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -457,8 +457,10 @@ define <2 x i64> @nand_q(<2 x i64> %0, <2 x i64> %1) #0 {
457457
;
458458
; SVE2-LABEL: nand_q:
459459
; SVE2: // %bb.0:
460-
; SVE2-NEXT: and v0.16b, v1.16b, v0.16b
461-
; SVE2-NEXT: mvn v0.16b, v0.16b
460+
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
461+
; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
462+
; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z1.d
463+
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
462464
; SVE2-NEXT: ret
463465
%3 = and <2 x i64> %1, %0
464466
%4 = xor <2 x i64> %3, splat (i64 -1)
@@ -475,8 +477,10 @@ define <2 x i64> @nor_q(<2 x i64> %0, <2 x i64> %1) #0 {
475477
;
476478
; SVE2-LABEL: nor_q:
477479
; SVE2: // %bb.0:
478-
; SVE2-NEXT: orr v0.16b, v1.16b, v0.16b
479-
; SVE2-NEXT: mvn v0.16b, v0.16b
480+
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
481+
; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
482+
; SVE2-NEXT: nbsl z0.d, z0.d, z1.d, z0.d
483+
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
480484
; SVE2-NEXT: ret
481485
%3 = or <2 x i64> %1, %0
482486
%4 = xor <2 x i64> %3, splat (i64 -1)
@@ -493,8 +497,10 @@ define <2 x i64> @eon_q(<2 x i64> %0, <2 x i64> %1) #0 {
493497
;
494498
; SVE2-LABEL: eon_q:
495499
; SVE2: // %bb.0:
496-
; SVE2-NEXT: eor v0.16b, v0.16b, v1.16b
497-
; SVE2-NEXT: mvn v0.16b, v0.16b
500+
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
501+
; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
502+
; SVE2-NEXT: bsl2n z0.d, z0.d, z0.d, z1.d
503+
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
498504
; SVE2-NEXT: ret
499505
%3 = xor <2 x i64> %0, %1
500506
%4 = xor <2 x i64> %3, splat (i64 -1)

llvm/test/CodeGen/AArch64/eor3.ll

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -259,14 +259,18 @@ define <2 x i64> @eor3_vnot(<2 x i64> %0, <2 x i64> %1) {
259259
;
260260
; SVE2-LABEL: eor3_vnot:
261261
; SVE2: // %bb.0:
262-
; SVE2-NEXT: eor v0.16b, v0.16b, v1.16b
263-
; SVE2-NEXT: mvn v0.16b, v0.16b
262+
; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
263+
; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
264+
; SVE2-NEXT: bsl2n z0.d, z0.d, z0.d, z1.d
265+
; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
264266
; SVE2-NEXT: ret
265267
;
266268
; SHA3-SVE2-LABEL: eor3_vnot:
267269
; SHA3-SVE2: // %bb.0:
268-
; SHA3-SVE2-NEXT: eor v0.16b, v0.16b, v1.16b
269-
; SHA3-SVE2-NEXT: mvn v0.16b, v0.16b
270+
; SHA3-SVE2-NEXT: // kill: def $q0 killed $q0 def $z0
271+
; SHA3-SVE2-NEXT: // kill: def $q1 killed $q1 def $z1
272+
; SHA3-SVE2-NEXT: bsl2n z0.d, z0.d, z0.d, z1.d
273+
; SHA3-SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0
270274
; SHA3-SVE2-NEXT: ret
271275
%3 = xor <2 x i64> %0, <i64 -1, i64 -1>
272276
%4 = xor <2 x i64> %3, %1

llvm/test/CodeGen/AArch64/sve-pred-selectop.ll

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -322,11 +322,9 @@ entry:
322322
define <vscale x 4 x i32> @ornot_v4i32(<vscale x 4 x i32> %z, <vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
323323
; CHECK-LABEL: ornot_v4i32:
324324
; CHECK: // %bb.0: // %entry
325-
; CHECK-NEXT: mov z3.s, #-1 // =0xffffffffffffffff
326325
; CHECK-NEXT: ptrue p0.s
326+
; CHECK-NEXT: bsl2n z1.d, z1.d, z2.d, z1.d
327327
; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0
328-
; CHECK-NEXT: eor z2.d, z2.d, z3.d
329-
; CHECK-NEXT: orr z1.d, z1.d, z2.d
330328
; CHECK-NEXT: mov z0.s, p0/m, z1.s
331329
; CHECK-NEXT: ret
332330
entry:
@@ -340,11 +338,9 @@ entry:
340338
define <vscale x 8 x i16> @ornot_v8i16(<vscale x 8 x i16> %z, <vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
341339
; CHECK-LABEL: ornot_v8i16:
342340
; CHECK: // %bb.0: // %entry
343-
; CHECK-NEXT: mov z3.h, #-1 // =0xffffffffffffffff
344341
; CHECK-NEXT: ptrue p0.h
342+
; CHECK-NEXT: bsl2n z1.d, z1.d, z2.d, z1.d
345343
; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0
346-
; CHECK-NEXT: eor z2.d, z2.d, z3.d
347-
; CHECK-NEXT: orr z1.d, z1.d, z2.d
348344
; CHECK-NEXT: mov z0.h, p0/m, z1.h
349345
; CHECK-NEXT: ret
350346
entry:
@@ -358,11 +354,9 @@ entry:
358354
define <vscale x 16 x i8> @ornot_v16i8(<vscale x 16 x i8> %z, <vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
359355
; CHECK-LABEL: ornot_v16i8:
360356
; CHECK: // %bb.0: // %entry
361-
; CHECK-NEXT: mov z3.b, #-1 // =0xffffffffffffffff
362357
; CHECK-NEXT: ptrue p0.b
358+
; CHECK-NEXT: bsl2n z1.d, z1.d, z2.d, z1.d
363359
; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0
364-
; CHECK-NEXT: eor z2.d, z2.d, z3.d
365-
; CHECK-NEXT: orr z1.d, z1.d, z2.d
366360
; CHECK-NEXT: mov z0.b, p0/m, z1.b
367361
; CHECK-NEXT: ret
368362
entry:

llvm/test/CodeGen/AArch64/sve2-bsl.ll

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -330,9 +330,7 @@ define <vscale x 2 x i64> @not(<vscale x 2 x i64> %0) #0 {
330330
define <vscale x 2 x i64> @nand(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
331331
; CHECK-LABEL: nand:
332332
; CHECK: // %bb.0:
333-
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
334-
; CHECK-NEXT: and z0.d, z1.d, z0.d
335-
; CHECK-NEXT: eor z0.d, z0.d, z2.d
333+
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z1.d
336334
; CHECK-NEXT: ret
337335
%3 = and <vscale x 2 x i64> %1, %0
338336
%4 = xor <vscale x 2 x i64> %3, splat (i64 -1)
@@ -343,9 +341,7 @@ define <vscale x 2 x i64> @nand(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0
343341
define <vscale x 2 x i64> @nor(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
344342
; CHECK-LABEL: nor:
345343
; CHECK: // %bb.0:
346-
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
347-
; CHECK-NEXT: orr z0.d, z1.d, z0.d
348-
; CHECK-NEXT: eor z0.d, z0.d, z2.d
344+
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z0.d
349345
; CHECK-NEXT: ret
350346
%3 = or <vscale x 2 x i64> %1, %0
351347
%4 = xor <vscale x 2 x i64> %3, splat (i64 -1)
@@ -356,8 +352,7 @@ define <vscale x 2 x i64> @nor(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0
356352
define <vscale x 2 x i64> @eon(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
357353
; CHECK-LABEL: eon:
358354
; CHECK: // %bb.0:
359-
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
360-
; CHECK-NEXT: eor3 z0.d, z0.d, z1.d, z2.d
355+
; CHECK-NEXT: bsl2n z0.d, z0.d, z0.d, z1.d
361356
; CHECK-NEXT: ret
362357
%3 = xor <vscale x 2 x i64> %0, %1
363358
%4 = xor <vscale x 2 x i64> %3, splat (i64 -1)
@@ -368,9 +363,7 @@ define <vscale x 2 x i64> @eon(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0
368363
define <vscale x 2 x i64> @orn(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
369364
; CHECK-LABEL: orn:
370365
; CHECK: // %bb.0:
371-
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
372-
; CHECK-NEXT: eor z1.d, z1.d, z2.d
373-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
366+
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z0.d
374367
; CHECK-NEXT: ret
375368
%3 = xor <vscale x 2 x i64> %1, splat (i64 -1)
376369
%4 = or <vscale x 2 x i64> %0, %3

0 commit comments

Comments
 (0)