Skip to content

Commit 46785cb

Browse files
committed
Add tests.
1 parent d1fe7a2 commit 46785cb

File tree

3 files changed

+196
-1
lines changed

3 files changed

+196
-1
lines changed

llvm/test/CodeGen/AArch64/bsl.ll

Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -431,3 +431,88 @@ define <4 x i8> @bsl2n_v4i8(<4 x i8> %0, <4 x i8> %1, <4 x i8> %2) {
431431
%7 = or <4 x i8> %4, %6
432432
ret <4 x i8> %7
433433
}
434+
435+
; NOT (a) has a dedicated instruction (MVN).
436+
define <2 x i64> @not_q(<2 x i64> %0) #0 {
437+
; NEON-LABEL: not_q:
438+
; NEON: // %bb.0:
439+
; NEON-NEXT: mvn v0.16b, v0.16b
440+
; NEON-NEXT: ret
441+
;
442+
; SVE2-LABEL: not_q:
443+
; SVE2: // %bb.0:
444+
; SVE2-NEXT: mvn v0.16b, v0.16b
445+
; SVE2-NEXT: ret
446+
%2 = xor <2 x i64> %0, splat (i64 -1)
447+
ret <2 x i64> %2
448+
}
449+
450+
; NAND (a, b) = NBSL (a, b, b) = NBSL (b, a, a).
451+
define <2 x i64> @nand_q(<2 x i64> %0, <2 x i64> %1) #0 {
452+
; NEON-LABEL: nand_q:
453+
; NEON: // %bb.0:
454+
; NEON-NEXT: and v0.16b, v1.16b, v0.16b
455+
; NEON-NEXT: mvn v0.16b, v0.16b
456+
; NEON-NEXT: ret
457+
;
458+
; SVE2-LABEL: nand_q:
459+
; SVE2: // %bb.0:
460+
; SVE2-NEXT: and v0.16b, v1.16b, v0.16b
461+
; SVE2-NEXT: mvn v0.16b, v0.16b
462+
; SVE2-NEXT: ret
463+
%3 = and <2 x i64> %1, %0
464+
%4 = xor <2 x i64> %3, splat (i64 -1)
465+
ret <2 x i64> %4
466+
}
467+
468+
; NOR (a, b) = NBSL (a, b, a) = NBSL (b, a, b).
469+
define <2 x i64> @nor_q(<2 x i64> %0, <2 x i64> %1) #0 {
470+
; NEON-LABEL: nor_q:
471+
; NEON: // %bb.0:
472+
; NEON-NEXT: orr v0.16b, v1.16b, v0.16b
473+
; NEON-NEXT: mvn v0.16b, v0.16b
474+
; NEON-NEXT: ret
475+
;
476+
; SVE2-LABEL: nor_q:
477+
; SVE2: // %bb.0:
478+
; SVE2-NEXT: orr v0.16b, v1.16b, v0.16b
479+
; SVE2-NEXT: mvn v0.16b, v0.16b
480+
; SVE2-NEXT: ret
481+
%3 = or <2 x i64> %1, %0
482+
%4 = xor <2 x i64> %3, splat (i64 -1)
483+
ret <2 x i64> %4
484+
}
485+
486+
; EON (a, b) = BSL2N (a, a, b) = BSL2N (b, b, a).
487+
define <2 x i64> @eon_q(<2 x i64> %0, <2 x i64> %1) #0 {
488+
; NEON-LABEL: eon_q:
489+
; NEON: // %bb.0:
490+
; NEON-NEXT: eor v0.16b, v0.16b, v1.16b
491+
; NEON-NEXT: mvn v0.16b, v0.16b
492+
; NEON-NEXT: ret
493+
;
494+
; SVE2-LABEL: eon_q:
495+
; SVE2: // %bb.0:
496+
; SVE2-NEXT: eor v0.16b, v0.16b, v1.16b
497+
; SVE2-NEXT: mvn v0.16b, v0.16b
498+
; SVE2-NEXT: ret
499+
%3 = xor <2 x i64> %0, %1
500+
%4 = xor <2 x i64> %3, splat (i64 -1)
501+
ret <2 x i64> %4
502+
}
503+
504+
; ORN (a, b) has a dedicated instruction (ORN).
505+
define <2 x i64> @orn_q(<2 x i64> %0, <2 x i64> %1) #0 {
506+
; NEON-LABEL: orn_q:
507+
; NEON: // %bb.0:
508+
; NEON-NEXT: orn v0.16b, v0.16b, v1.16b
509+
; NEON-NEXT: ret
510+
;
511+
; SVE2-LABEL: orn_q:
512+
; SVE2: // %bb.0:
513+
; SVE2-NEXT: orn v0.16b, v0.16b, v1.16b
514+
; SVE2-NEXT: ret
515+
%3 = xor <2 x i64> %1, splat (i64 -1)
516+
%4 = or <2 x i64> %0, %3
517+
ret <2 x i64> %4
518+
}

llvm/test/CodeGen/AArch64/eor3.ll

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; RUN: llc -mtriple=aarch64 -mattr=+sha3 < %s | FileCheck --check-prefix=SHA3 %s
33
; RUN: llc -mtriple=aarch64 -mattr=-sha3 < %s | FileCheck --check-prefix=NOSHA3 %s
44
; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s | FileCheck --check-prefix=SVE2 %s
5-
; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 < %s | FileCheck --check-prefix=SHA3 %s
5+
; RUN: llc -mtriple=aarch64 -mattr=+sha3,+sve2 < %s | FileCheck --check-prefix=SHA3-SVE2 %s
66

77
define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
88
; SHA3-LABEL: eor3_16x8_left:
@@ -24,6 +24,11 @@ define <16 x i8> @eor3_16x8_left(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
2424
; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d
2525
; SVE2-NEXT: mov v0.16b, v2.16b
2626
; SVE2-NEXT: ret
27+
;
28+
; SHA3-SVE2-LABEL: eor3_16x8_left:
29+
; SHA3-SVE2: // %bb.0:
30+
; SHA3-SVE2-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
31+
; SHA3-SVE2-NEXT: ret
2732
%4 = xor <16 x i8> %0, %1
2833
%5 = xor <16 x i8> %2, %4
2934
ret <16 x i8> %5
@@ -49,6 +54,11 @@ define <16 x i8> @eor3_16x8_right(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2) {
4954
; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d
5055
; SVE2-NEXT: mov v0.16b, v1.16b
5156
; SVE2-NEXT: ret
57+
;
58+
; SHA3-SVE2-LABEL: eor3_16x8_right:
59+
; SHA3-SVE2: // %bb.0:
60+
; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
61+
; SHA3-SVE2-NEXT: ret
5262
%4 = xor <16 x i8> %1, %2
5363
%5 = xor <16 x i8> %4, %0
5464
ret <16 x i8> %5
@@ -74,6 +84,11 @@ define <8 x i16> @eor3_8x16_left(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
7484
; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d
7585
; SVE2-NEXT: mov v0.16b, v2.16b
7686
; SVE2-NEXT: ret
87+
;
88+
; SHA3-SVE2-LABEL: eor3_8x16_left:
89+
; SHA3-SVE2: // %bb.0:
90+
; SHA3-SVE2-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
91+
; SHA3-SVE2-NEXT: ret
7792
%4 = xor <8 x i16> %0, %1
7893
%5 = xor <8 x i16> %2, %4
7994
ret <8 x i16> %5
@@ -99,6 +114,11 @@ define <8 x i16> @eor3_8x16_right(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) {
99114
; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d
100115
; SVE2-NEXT: mov v0.16b, v1.16b
101116
; SVE2-NEXT: ret
117+
;
118+
; SHA3-SVE2-LABEL: eor3_8x16_right:
119+
; SHA3-SVE2: // %bb.0:
120+
; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
121+
; SHA3-SVE2-NEXT: ret
102122
%4 = xor <8 x i16> %1, %2
103123
%5 = xor <8 x i16> %4, %0
104124
ret <8 x i16> %5
@@ -124,6 +144,11 @@ define <4 x i32> @eor3_4x32_left(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
124144
; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d
125145
; SVE2-NEXT: mov v0.16b, v2.16b
126146
; SVE2-NEXT: ret
147+
;
148+
; SHA3-SVE2-LABEL: eor3_4x32_left:
149+
; SHA3-SVE2: // %bb.0:
150+
; SHA3-SVE2-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
151+
; SHA3-SVE2-NEXT: ret
127152
%4 = xor <4 x i32> %0, %1
128153
%5 = xor <4 x i32> %2, %4
129154
ret <4 x i32> %5
@@ -149,6 +174,11 @@ define <4 x i32> @eor3_4x32_right(<4 x i32> %0, <4 x i32> %1, <4 x i32> %2) {
149174
; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d
150175
; SVE2-NEXT: mov v0.16b, v1.16b
151176
; SVE2-NEXT: ret
177+
;
178+
; SHA3-SVE2-LABEL: eor3_4x32_right:
179+
; SHA3-SVE2: // %bb.0:
180+
; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
181+
; SHA3-SVE2-NEXT: ret
152182
%4 = xor <4 x i32> %1, %2
153183
%5 = xor <4 x i32> %4, %0
154184
ret <4 x i32> %5
@@ -174,6 +204,11 @@ define <2 x i64> @eor3_2x64_left(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
174204
; SVE2-NEXT: eor3 z2.d, z2.d, z0.d, z1.d
175205
; SVE2-NEXT: mov v0.16b, v2.16b
176206
; SVE2-NEXT: ret
207+
;
208+
; SHA3-SVE2-LABEL: eor3_2x64_left:
209+
; SHA3-SVE2: // %bb.0:
210+
; SHA3-SVE2-NEXT: eor3 v0.16b, v0.16b, v1.16b, v2.16b
211+
; SHA3-SVE2-NEXT: ret
177212
%4 = xor <2 x i64> %0, %1
178213
%5 = xor <2 x i64> %2, %4
179214
ret <2 x i64> %5
@@ -199,6 +234,11 @@ define <2 x i64> @eor3_2x64_right(<2 x i64> %0, <2 x i64> %1, <2 x i64> %2) {
199234
; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d
200235
; SVE2-NEXT: mov v0.16b, v1.16b
201236
; SVE2-NEXT: ret
237+
;
238+
; SHA3-SVE2-LABEL: eor3_2x64_right:
239+
; SHA3-SVE2: // %bb.0:
240+
; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
241+
; SHA3-SVE2-NEXT: ret
202242
%4 = xor <2 x i64> %1, %2
203243
%5 = xor <2 x i64> %4, %0
204244
ret <2 x i64> %5
@@ -222,6 +262,12 @@ define <2 x i64> @eor3_vnot(<2 x i64> %0, <2 x i64> %1) {
222262
; SVE2-NEXT: eor v0.16b, v0.16b, v1.16b
223263
; SVE2-NEXT: mvn v0.16b, v0.16b
224264
; SVE2-NEXT: ret
265+
;
266+
; SHA3-SVE2-LABEL: eor3_vnot:
267+
; SHA3-SVE2: // %bb.0:
268+
; SHA3-SVE2-NEXT: eor v0.16b, v0.16b, v1.16b
269+
; SHA3-SVE2-NEXT: mvn v0.16b, v0.16b
270+
; SHA3-SVE2-NEXT: ret
225271
%3 = xor <2 x i64> %0, <i64 -1, i64 -1>
226272
%4 = xor <2 x i64> %3, %1
227273
ret <2 x i64> %4

llvm/test/CodeGen/AArch64/sve2-bsl.ll

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,3 +312,67 @@ entry:
312312
%t3 = xor <vscale x 4 x i32> %t2, %b
313313
ret <vscale x 4 x i32> %t3
314314
}
315+
316+
; NOT (a) = NBSL (a, a, a).
317+
; We don't have a pattern for this right now because the tied register
318+
; constraint can lead to worse code gen.
319+
define <vscale x 2 x i64> @not(<vscale x 2 x i64> %0) #0 {
320+
; CHECK-LABEL: not:
321+
; CHECK: // %bb.0:
322+
; CHECK-NEXT: mov z1.d, #-1 // =0xffffffffffffffff
323+
; CHECK-NEXT: eor z0.d, z0.d, z1.d
324+
; CHECK-NEXT: ret
325+
%2 = xor <vscale x 2 x i64> %0, splat (i64 -1)
326+
ret <vscale x 2 x i64> %2
327+
}
328+
329+
; NAND (a, b) = NBSL (a, b, b) = NBSL (b, a, a).
330+
define <vscale x 2 x i64> @nand(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
331+
; CHECK-LABEL: nand:
332+
; CHECK: // %bb.0:
333+
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
334+
; CHECK-NEXT: and z0.d, z1.d, z0.d
335+
; CHECK-NEXT: eor z0.d, z0.d, z2.d
336+
; CHECK-NEXT: ret
337+
%3 = and <vscale x 2 x i64> %1, %0
338+
%4 = xor <vscale x 2 x i64> %3, splat (i64 -1)
339+
ret <vscale x 2 x i64> %4
340+
}
341+
342+
; NOR (a, b) = NBSL (a, b, a) = NBSL (b, a, b).
343+
define <vscale x 2 x i64> @nor(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
344+
; CHECK-LABEL: nor:
345+
; CHECK: // %bb.0:
346+
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
347+
; CHECK-NEXT: orr z0.d, z1.d, z0.d
348+
; CHECK-NEXT: eor z0.d, z0.d, z2.d
349+
; CHECK-NEXT: ret
350+
%3 = or <vscale x 2 x i64> %1, %0
351+
%4 = xor <vscale x 2 x i64> %3, splat (i64 -1)
352+
ret <vscale x 2 x i64> %4
353+
}
354+
355+
; EON (a, b) = BSL2N (a, a, b) = BSL2N (b, b, a).
356+
define <vscale x 2 x i64> @eon(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
357+
; CHECK-LABEL: eon:
358+
; CHECK: // %bb.0:
359+
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
360+
; CHECK-NEXT: eor3 z0.d, z0.d, z1.d, z2.d
361+
; CHECK-NEXT: ret
362+
%3 = xor <vscale x 2 x i64> %0, %1
363+
%4 = xor <vscale x 2 x i64> %3, splat (i64 -1)
364+
ret <vscale x 2 x i64> %4
365+
}
366+
367+
; ORN (a, b) = BSL2N (a, b, a).
368+
define <vscale x 2 x i64> @orn(<vscale x 2 x i64> %0, <vscale x 2 x i64> %1) #0 {
369+
; CHECK-LABEL: orn:
370+
; CHECK: // %bb.0:
371+
; CHECK-NEXT: mov z2.d, #-1 // =0xffffffffffffffff
372+
; CHECK-NEXT: eor z1.d, z1.d, z2.d
373+
; CHECK-NEXT: orr z0.d, z0.d, z1.d
374+
; CHECK-NEXT: ret
375+
%3 = xor <vscale x 2 x i64> %1, splat (i64 -1)
376+
%4 = or <vscale x 2 x i64> %0, %3
377+
ret <vscale x 2 x i64> %4
378+
}

0 commit comments

Comments
 (0)