Skip to content

Commit c25e7c9

Browse files
committed
[AArch64][SVE] Add patterns for bit-select instructions.
We are not selecting BSL/NBSL/BSL1N/BSL2N in some cases, e.g.: ```cpp svuint64_t bsl(svuint64_t a, svuint64_t b, svuint64_t c) { return (a & c) | (b & ~c); } ``` Currently generates: ```gas bsl: and z0.d, z2.d, z0.d bic z1.d, z1.d, z2.d orr z0.d, z0.d, z1.d ret ``` Instead of: ```gas bsl: bsl z0.d, z0.d, z1.d, z2.d ret ``` This patch adds patterns to match (or (and a, c), (and b, (vnot c)))) to BSL, and similar derivative patterns for the other bit-sel instructions.
1 parent 6f92bd5 commit c25e7c9

File tree

2 files changed

+30
-68
lines changed

2 files changed

+30
-68
lines changed

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -546,11 +546,21 @@ def AArch64umulh : PatFrag<(ops node:$op1, node:$op2),
546546

547547
def AArch64bsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
548548
[(int_aarch64_sve_bsl node:$Op1, node:$Op2, node:$Op3),
549-
(AArch64bsp node:$Op3, node:$Op1, node:$Op2)]>;
549+
(AArch64bsp node:$Op3, node:$Op1, node:$Op2),
550+
(or (and node:$Op1, node:$Op3), (and node:$Op2, (vnot node:$Op3)))]>;
551+
552+
def AArch64bsl1n : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
553+
[(int_aarch64_sve_bsl1n node:$Op1, node:$Op2, node:$Op3),
554+
(AArch64bsl (vnot node:$Op1), node:$Op2, node:$Op3)]>;
555+
556+
def AArch64bsl2n : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
557+
[(int_aarch64_sve_bsl2n node:$Op1, node:$Op2, node:$Op3),
558+
(or (and node:$Op1, node:$Op3), (vnot (or node:$Op2, node:$Op3)))]>;
550559

551560
def AArch64nbsl : PatFrags<(ops node:$Op1, node:$Op2, node:$Op3),
552561
[(int_aarch64_sve_nbsl node:$Op1, node:$Op2, node:$Op3),
553-
(vnot (AArch64bsp node:$Op3, node:$Op1, node:$Op2))]>;
562+
(vnot (AArch64bsp node:$Op3, node:$Op1, node:$Op2)),
563+
(vnot (AArch64bsl node:$Op1, node:$Op2, node:$Op3))]>;
554564

555565

556566
let Predicates = [HasSVE] in {
@@ -3923,8 +3933,8 @@ let Predicates = [HasSVE2_or_SME] in {
39233933
defm EOR3_ZZZZ : sve2_int_bitwise_ternary_op<0b000, "eor3", AArch64eor3>;
39243934
defm BCAX_ZZZZ : sve2_int_bitwise_ternary_op<0b010, "bcax", AArch64bcax>;
39253935
defm BSL_ZZZZ : sve2_int_bitwise_ternary_op<0b001, "bsl", AArch64bsl>;
3926-
defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", int_aarch64_sve_bsl1n>;
3927-
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", int_aarch64_sve_bsl2n>;
3936+
defm BSL1N_ZZZZ : sve2_int_bitwise_ternary_op<0b011, "bsl1n", AArch64bsl1n>;
3937+
defm BSL2N_ZZZZ : sve2_int_bitwise_ternary_op<0b101, "bsl2n", AArch64bsl2n>;
39283938
defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", AArch64nbsl>;
39293939

39303940
// SVE2 bitwise xor and rotate right by immediate

llvm/test/CodeGen/AArch64/sve2-bsl.ll

Lines changed: 16 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -103,9 +103,7 @@ define <vscale x 2 x i64> @nbsl_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
103103
define <vscale x 16 x i8> @codegen_bsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
104104
; CHECK-LABEL: codegen_bsl_i8:
105105
; CHECK: // %bb.0:
106-
; CHECK-NEXT: and z0.d, z2.d, z0.d
107-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
108-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
106+
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
109107
; CHECK-NEXT: ret
110108
%4 = and <vscale x 16 x i8> %2, %0
111109
%5 = xor <vscale x 16 x i8> %2, splat (i8 -1)
@@ -117,11 +115,7 @@ define <vscale x 16 x i8> @codegen_bsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x
117115
define <vscale x 16 x i8> @codegen_nbsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
118116
; CHECK-LABEL: codegen_nbsl_i8:
119117
; CHECK: // %bb.0:
120-
; CHECK-NEXT: and z0.d, z2.d, z0.d
121-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
122-
; CHECK-NEXT: mov z2.b, #-1 // =0xffffffffffffffff
123-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
124-
; CHECK-NEXT: eor z0.d, z0.d, z2.d
118+
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
125119
; CHECK-NEXT: ret
126120
%4 = and <vscale x 16 x i8> %2, %0
127121
%5 = xor <vscale x 16 x i8> %2, splat (i8 -1)
@@ -134,9 +128,7 @@ define <vscale x 16 x i8> @codegen_nbsl_i8(<vscale x 16 x i8> %0, <vscale x 16 x
134128
define <vscale x 16 x i8> @codegen_bsl1n_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
135129
; CHECK-LABEL: codegen_bsl1n_i8:
136130
; CHECK: // %bb.0:
137-
; CHECK-NEXT: bic z0.d, z2.d, z0.d
138-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
139-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
131+
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
140132
; CHECK-NEXT: ret
141133
%4 = xor <vscale x 16 x i8> %0, splat (i8 -1)
142134
%5 = and <vscale x 16 x i8> %2, %4
@@ -149,11 +141,7 @@ define <vscale x 16 x i8> @codegen_bsl1n_i8(<vscale x 16 x i8> %0, <vscale x 16
149141
define <vscale x 16 x i8> @codegen_bsl2n_i8(<vscale x 16 x i8> %0, <vscale x 16 x i8> %1, <vscale x 16 x i8> %2) {
150142
; CHECK-LABEL: codegen_bsl2n_i8:
151143
; CHECK: // %bb.0:
152-
; CHECK-NEXT: orr z1.d, z2.d, z1.d
153-
; CHECK-NEXT: mov z3.b, #-1 // =0xffffffffffffffff
154-
; CHECK-NEXT: and z0.d, z2.d, z0.d
155-
; CHECK-NEXT: eor z1.d, z1.d, z3.d
156-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
144+
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
157145
; CHECK-NEXT: ret
158146
%4 = and <vscale x 16 x i8> %2, %0
159147
%5 = or <vscale x 16 x i8> %2, %1
@@ -165,9 +153,7 @@ define <vscale x 16 x i8> @codegen_bsl2n_i8(<vscale x 16 x i8> %0, <vscale x 16
165153
define <vscale x 8 x i16> @codegen_bsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
166154
; CHECK-LABEL: codegen_bsl_i16:
167155
; CHECK: // %bb.0:
168-
; CHECK-NEXT: and z0.d, z2.d, z0.d
169-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
170-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
156+
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
171157
; CHECK-NEXT: ret
172158
%4 = and <vscale x 8 x i16> %2, %0
173159
%5 = xor <vscale x 8 x i16> %2, splat (i16 -1)
@@ -179,11 +165,7 @@ define <vscale x 8 x i16> @codegen_bsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x
179165
define <vscale x 8 x i16> @codegen_nbsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
180166
; CHECK-LABEL: codegen_nbsl_i16:
181167
; CHECK: // %bb.0:
182-
; CHECK-NEXT: and z0.d, z2.d, z0.d
183-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
184-
; CHECK-NEXT: mov z2.h, #-1 // =0xffffffffffffffff
185-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
186-
; CHECK-NEXT: eor z0.d, z0.d, z2.d
168+
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
187169
; CHECK-NEXT: ret
188170
%4 = and <vscale x 8 x i16> %2, %0
189171
%5 = xor <vscale x 8 x i16> %2, splat (i16 -1)
@@ -196,9 +178,7 @@ define <vscale x 8 x i16> @codegen_nbsl_i16(<vscale x 8 x i16> %0, <vscale x 8 x
196178
define <vscale x 8 x i16> @codegen_bsl1n_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
197179
; CHECK-LABEL: codegen_bsl1n_i16:
198180
; CHECK: // %bb.0:
199-
; CHECK-NEXT: bic z0.d, z2.d, z0.d
200-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
201-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
181+
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
202182
; CHECK-NEXT: ret
203183
%4 = xor <vscale x 8 x i16> %0, splat (i16 -1)
204184
%5 = and <vscale x 8 x i16> %2, %4
@@ -211,11 +191,7 @@ define <vscale x 8 x i16> @codegen_bsl1n_i16(<vscale x 8 x i16> %0, <vscale x 8
211191
define <vscale x 8 x i16> @codegen_bsl2n_i16(<vscale x 8 x i16> %0, <vscale x 8 x i16> %1, <vscale x 8 x i16> %2) {
212192
; CHECK-LABEL: codegen_bsl2n_i16:
213193
; CHECK: // %bb.0:
214-
; CHECK-NEXT: orr z1.d, z2.d, z1.d
215-
; CHECK-NEXT: mov z3.h, #-1 // =0xffffffffffffffff
216-
; CHECK-NEXT: and z0.d, z2.d, z0.d
217-
; CHECK-NEXT: eor z1.d, z1.d, z3.d
218-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
194+
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
219195
; CHECK-NEXT: ret
220196
%4 = and <vscale x 8 x i16> %2, %0
221197
%5 = or <vscale x 8 x i16> %2, %1
@@ -227,9 +203,7 @@ define <vscale x 8 x i16> @codegen_bsl2n_i16(<vscale x 8 x i16> %0, <vscale x 8
227203
define <vscale x 4 x i32> @codegen_bsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
228204
; CHECK-LABEL: codegen_bsl_i32:
229205
; CHECK: // %bb.0:
230-
; CHECK-NEXT: and z0.d, z2.d, z0.d
231-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
232-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
206+
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
233207
; CHECK-NEXT: ret
234208
%4 = and <vscale x 4 x i32> %2, %0
235209
%5 = xor <vscale x 4 x i32> %2, splat (i32 -1)
@@ -241,11 +215,7 @@ define <vscale x 4 x i32> @codegen_bsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x
241215
define <vscale x 4 x i32> @codegen_nbsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
242216
; CHECK-LABEL: codegen_nbsl_i32:
243217
; CHECK: // %bb.0:
244-
; CHECK-NEXT: and z0.d, z2.d, z0.d
245-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
246-
; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff
247-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
248-
; CHECK-NEXT: eor z0.d, z0.d, z2.d
218+
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
249219
; CHECK-NEXT: ret
250220
%4 = and <vscale x 4 x i32> %2, %0
251221
%5 = xor <vscale x 4 x i32> %2, splat (i32 -1)
@@ -258,9 +228,7 @@ define <vscale x 4 x i32> @codegen_nbsl_i32(<vscale x 4 x i32> %0, <vscale x 4 x
258228
define <vscale x 4 x i32> @codegen_bsl1n_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
259229
; CHECK-LABEL: codegen_bsl1n_i32:
260230
; CHECK: // %bb.0:
261-
; CHECK-NEXT: bic z0.d, z2.d, z0.d
262-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
263-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
231+
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
264232
; CHECK-NEXT: ret
265233
%4 = xor <vscale x 4 x i32> %0, splat (i32 -1)
266234
%5 = and <vscale x 4 x i32> %2, %4
@@ -273,11 +241,7 @@ define <vscale x 4 x i32> @codegen_bsl1n_i32(<vscale x 4 x i32> %0, <vscale x 4
273241
define <vscale x 4 x i32> @codegen_bsl2n_i32(<vscale x 4 x i32> %0, <vscale x 4 x i32> %1, <vscale x 4 x i32> %2) {
274242
; CHECK-LABEL: codegen_bsl2n_i32:
275243
; CHECK: // %bb.0:
276-
; CHECK-NEXT: orr z1.d, z2.d, z1.d
277-
; CHECK-NEXT: mov z3.s, #-1 // =0xffffffffffffffff
278-
; CHECK-NEXT: and z0.d, z2.d, z0.d
279-
; CHECK-NEXT: eor z1.d, z1.d, z3.d
280-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
244+
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
281245
; CHECK-NEXT: ret
282246
%4 = and <vscale x 4 x i32> %2, %0
283247
%5 = or <vscale x 4 x i32> %2, %1
@@ -289,9 +253,7 @@ define <vscale x 4 x i32> @codegen_bsl2n_i32(<vscale x 4 x i32> %0, <vscale x 4
289253
define <vscale x 2 x i64> @codegen_bsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
290254
; CHECK-LABEL: codegen_bsl_i64:
291255
; CHECK: // %bb.0:
292-
; CHECK-NEXT: and z0.d, z2.d, z0.d
293-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
294-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
256+
; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d
295257
; CHECK-NEXT: ret
296258
%4 = and <vscale x 2 x i64> %2, %0
297259
%5 = xor <vscale x 2 x i64> %2, splat (i64 -1)
@@ -303,11 +265,7 @@ define <vscale x 2 x i64> @codegen_bsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x
303265
define <vscale x 2 x i64> @codegen_nbsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
304266
; CHECK-LABEL: codegen_nbsl_i64:
305267
; CHECK: // %bb.0:
306-
; CHECK-NEXT: and z0.d, z2.d, z0.d
307-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
308-
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
309-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
310-
; CHECK-NEXT: eor z0.d, z0.d, z2.d
268+
; CHECK-NEXT: nbsl z0.d, z0.d, z1.d, z2.d
311269
; CHECK-NEXT: ret
312270
%4 = and <vscale x 2 x i64> %2, %0
313271
%5 = xor <vscale x 2 x i64> %2, splat (i64 -1)
@@ -320,9 +278,7 @@ define <vscale x 2 x i64> @codegen_nbsl_i64(<vscale x 2 x i64> %0, <vscale x 2 x
320278
define <vscale x 2 x i64> @codegen_bsl1n_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
321279
; CHECK-LABEL: codegen_bsl1n_i64:
322280
; CHECK: // %bb.0:
323-
; CHECK-NEXT: bic z0.d, z2.d, z0.d
324-
; CHECK-NEXT: bic z1.d, z1.d, z2.d
325-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
281+
; CHECK-NEXT: bsl1n z0.d, z0.d, z1.d, z2.d
326282
; CHECK-NEXT: ret
327283
%4 = xor <vscale x 2 x i64> %0, splat (i64 -1)
328284
%5 = and <vscale x 2 x i64> %2, %4
@@ -335,11 +291,7 @@ define <vscale x 2 x i64> @codegen_bsl1n_i64(<vscale x 2 x i64> %0, <vscale x 2
335291
define <vscale x 2 x i64> @codegen_bsl2n_i64(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1, <vscale x 2 x i64> %2) {
336292
; CHECK-LABEL: codegen_bsl2n_i64:
337293
; CHECK: // %bb.0:
338-
; CHECK-NEXT: orr z1.d, z2.d, z1.d
339-
; CHECK-NEXT: mov z3.d, #-1 // =0xffffffffffffffff
340-
; CHECK-NEXT: and z0.d, z2.d, z0.d
341-
; CHECK-NEXT: eor z1.d, z1.d, z3.d
342-
; CHECK-NEXT: orr z0.d, z0.d, z1.d
294+
; CHECK-NEXT: bsl2n z0.d, z0.d, z1.d, z2.d
343295
; CHECK-NEXT: ret
344296
%4 = and <vscale x 2 x i64> %2, %0
345297
%5 = or <vscale x 2 x i64> %2, %1

0 commit comments

Comments
 (0)