Skip to content

Commit 3cf2ac8

Browse files
authored
[AArch64][NEON] Add eor3 patterns for V64 xors (#165376)
This patch enables NEON EOR3 instruction to be emitted even for 64 bit vectors.
1 parent 1191970 commit 3cf2ac8

File tree

2 files changed

+167
-8
lines changed

2 files changed

+167
-8
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1805,14 +1805,22 @@ def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v8i16>;
18051805
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v4i32>;
18061806
def : SHA3_pattern<EOR3, int_aarch64_crypto_eor3u, v2i64>;
18071807

1808-
class EOR3_pattern<ValueType VecTy>
1809-
: Pat<(xor (xor (VecTy V128:$Vn), (VecTy V128:$Vm)), (VecTy V128:$Va)),
1810-
(EOR3 (VecTy V128:$Vn), (VecTy V128:$Vm), (VecTy V128:$Va))>;
1811-
1812-
def : EOR3_pattern<v16i8>;
1813-
def : EOR3_pattern<v8i16>;
1814-
def : EOR3_pattern<v4i32>;
1815-
def : EOR3_pattern<v2i64>;
1808+
multiclass EOR3_pattern<ValueType Vec128Ty, ValueType Vec64Ty>{
1809+
def : Pat<(xor (xor (Vec128Ty V128:$Vn), (Vec128Ty V128:$Vm)), (Vec128Ty V128:$Va)),
1810+
(EOR3 (Vec128Ty V128:$Vn), (Vec128Ty V128:$Vm), (Vec128Ty V128:$Va))>;
1811+
def : Pat<(xor (xor (Vec64Ty V64:$Vn), (Vec64Ty V64:$Vm)), (Vec64Ty V64:$Va)),
1812+
(EXTRACT_SUBREG
1813+
(EOR3
1814+
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vn, dsub),
1815+
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Vm, dsub),
1816+
(INSERT_SUBREG (IMPLICIT_DEF), V64:$Va, dsub)),
1817+
dsub)>;
1818+
}
1819+
1820+
defm : EOR3_pattern<v16i8, v8i8>;
1821+
defm : EOR3_pattern<v8i16, v4i16>;
1822+
defm : EOR3_pattern<v4i32, v2i32>;
1823+
defm : EOR3_pattern<v2i64, v1i64>;
18161824

18171825
class BCAX_pattern<ValueType VecTy>
18181826
: Pat<(xor (VecTy V128:$Vn), (and (VecTy V128:$Vm), (vnot (VecTy V128:$Va)))),

llvm/test/CodeGen/AArch64/eor3.ll

Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -277,3 +277,154 @@ define <2 x i64> @eor3_vnot(<2 x i64> %0, <2 x i64> %1) {
277277
ret <2 x i64> %4
278278
}
279279

280+
define <1 x i64> @eor3_1x64(<1 x i64> %0, <1 x i64> %1, <1 x i64> %2) {
281+
; SHA3-LABEL: eor3_1x64:
282+
; SHA3: // %bb.0:
283+
; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0
284+
; SHA3-NEXT: // kill: def $d2 killed $d2 def $q2
285+
; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1
286+
; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
287+
; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0
288+
; SHA3-NEXT: ret
289+
;
290+
; NOSHA3-LABEL: eor3_1x64:
291+
; NOSHA3: // %bb.0:
292+
; NOSHA3-NEXT: eor v1.8b, v1.8b, v2.8b
293+
; NOSHA3-NEXT: eor v0.8b, v1.8b, v0.8b
294+
; NOSHA3-NEXT: ret
295+
;
296+
; SVE2-LABEL: eor3_1x64:
297+
; SVE2: // %bb.0:
298+
; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
299+
; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
300+
; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
301+
; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d
302+
; SVE2-NEXT: fmov d0, d1
303+
; SVE2-NEXT: ret
304+
;
305+
; SHA3-SVE2-LABEL: eor3_1x64:
306+
; SHA3-SVE2: // %bb.0:
307+
; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 def $q0
308+
; SHA3-SVE2-NEXT: // kill: def $d2 killed $d2 def $q2
309+
; SHA3-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
310+
; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
311+
; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 killed $q0
312+
; SHA3-SVE2-NEXT: ret
313+
%4 = xor <1 x i64> %1, %2
314+
%5 = xor <1 x i64> %4, %0
315+
ret <1 x i64> %5
316+
}
317+
318+
define <2 x i32> @eor3_2x32(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) {
319+
; SHA3-LABEL: eor3_2x32:
320+
; SHA3: // %bb.0:
321+
; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0
322+
; SHA3-NEXT: // kill: def $d2 killed $d2 def $q2
323+
; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1
324+
; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
325+
; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0
326+
; SHA3-NEXT: ret
327+
;
328+
; NOSHA3-LABEL: eor3_2x32:
329+
; NOSHA3: // %bb.0:
330+
; NOSHA3-NEXT: eor v1.8b, v1.8b, v2.8b
331+
; NOSHA3-NEXT: eor v0.8b, v1.8b, v0.8b
332+
; NOSHA3-NEXT: ret
333+
;
334+
; SVE2-LABEL: eor3_2x32:
335+
; SVE2: // %bb.0:
336+
; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
337+
; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
338+
; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
339+
; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d
340+
; SVE2-NEXT: fmov d0, d1
341+
; SVE2-NEXT: ret
342+
;
343+
; SHA3-SVE2-LABEL: eor3_2x32:
344+
; SHA3-SVE2: // %bb.0:
345+
; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 def $q0
346+
; SHA3-SVE2-NEXT: // kill: def $d2 killed $d2 def $q2
347+
; SHA3-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
348+
; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
349+
; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 killed $q0
350+
; SHA3-SVE2-NEXT: ret
351+
%4 = xor <2 x i32> %1, %2
352+
%5 = xor <2 x i32> %4, %0
353+
ret <2 x i32> %5
354+
}
355+
356+
define <4 x i16> @eor3_4x16(<4 x i16> %0, <4 x i16> %1, <4 x i16> %2) {
357+
; SHA3-LABEL: eor3_4x16:
358+
; SHA3: // %bb.0:
359+
; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0
360+
; SHA3-NEXT: // kill: def $d2 killed $d2 def $q2
361+
; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1
362+
; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
363+
; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0
364+
; SHA3-NEXT: ret
365+
;
366+
; NOSHA3-LABEL: eor3_4x16:
367+
; NOSHA3: // %bb.0:
368+
; NOSHA3-NEXT: eor v1.8b, v1.8b, v2.8b
369+
; NOSHA3-NEXT: eor v0.8b, v1.8b, v0.8b
370+
; NOSHA3-NEXT: ret
371+
;
372+
; SVE2-LABEL: eor3_4x16:
373+
; SVE2: // %bb.0:
374+
; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
375+
; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
376+
; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
377+
; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d
378+
; SVE2-NEXT: fmov d0, d1
379+
; SVE2-NEXT: ret
380+
;
381+
; SHA3-SVE2-LABEL: eor3_4x16:
382+
; SHA3-SVE2: // %bb.0:
383+
; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 def $q0
384+
; SHA3-SVE2-NEXT: // kill: def $d2 killed $d2 def $q2
385+
; SHA3-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
386+
; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
387+
; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 killed $q0
388+
; SHA3-SVE2-NEXT: ret
389+
%4 = xor <4 x i16> %1, %2
390+
%5 = xor <4 x i16> %4, %0
391+
ret <4 x i16> %5
392+
}
393+
394+
define <8 x i8> @eor3_8x8(<8 x i8> %0, <8 x i8> %1, <8 x i8> %2) {
395+
; SHA3-LABEL: eor3_8x8:
396+
; SHA3: // %bb.0:
397+
; SHA3-NEXT: // kill: def $d0 killed $d0 def $q0
398+
; SHA3-NEXT: // kill: def $d2 killed $d2 def $q2
399+
; SHA3-NEXT: // kill: def $d1 killed $d1 def $q1
400+
; SHA3-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
401+
; SHA3-NEXT: // kill: def $d0 killed $d0 killed $q0
402+
; SHA3-NEXT: ret
403+
;
404+
; NOSHA3-LABEL: eor3_8x8:
405+
; NOSHA3: // %bb.0:
406+
; NOSHA3-NEXT: eor v1.8b, v1.8b, v2.8b
407+
; NOSHA3-NEXT: eor v0.8b, v1.8b, v0.8b
408+
; NOSHA3-NEXT: ret
409+
;
410+
; SVE2-LABEL: eor3_8x8:
411+
; SVE2: // %bb.0:
412+
; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1
413+
; SVE2-NEXT: // kill: def $d2 killed $d2 def $z2
414+
; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0
415+
; SVE2-NEXT: eor3 z1.d, z1.d, z2.d, z0.d
416+
; SVE2-NEXT: fmov d0, d1
417+
; SVE2-NEXT: ret
418+
;
419+
; SHA3-SVE2-LABEL: eor3_8x8:
420+
; SHA3-SVE2: // %bb.0:
421+
; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 def $q0
422+
; SHA3-SVE2-NEXT: // kill: def $d2 killed $d2 def $q2
423+
; SHA3-SVE2-NEXT: // kill: def $d1 killed $d1 def $q1
424+
; SHA3-SVE2-NEXT: eor3 v0.16b, v1.16b, v2.16b, v0.16b
425+
; SHA3-SVE2-NEXT: // kill: def $d0 killed $d0 killed $q0
426+
; SHA3-SVE2-NEXT: ret
427+
%4 = xor <8 x i8> %1, %2
428+
%5 = xor <8 x i8> %4, %0
429+
ret <8 x i8> %5
430+
}

0 commit comments

Comments
 (0)