Skip to content

Commit 4af268c

Browse files
committed
[X86] Show failure to fold freeze(gfni()) -> gfni(freeze(),freeze()) for all gfni instructions
1 parent c3470d1 commit 4af268c

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed

llvm/test/CodeGen/X86/combine-gfni.ll

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+gfni,+avx | FileCheck %s --check-prefixes=AVX
4+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+gfni,+avx512bw | FileCheck %s --check-prefixes=AVX512
5+
6+
define <16 x i8> @gf2p8affineqb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
7+
; SSE-LABEL: gf2p8affineqb_freeze:
8+
; SSE: # %bb.0:
9+
; SSE-NEXT: pxor %xmm3, %xmm3
10+
; SSE-NEXT: pcmpgtb %xmm2, %xmm3
11+
; SSE-NEXT: gf2p8affineqb $11, %xmm1, %xmm1
12+
; SSE-NEXT: pand %xmm3, %xmm1
13+
; SSE-NEXT: pandn %xmm0, %xmm3
14+
; SSE-NEXT: por %xmm1, %xmm3
15+
; SSE-NEXT: movdqa %xmm3, %xmm0
16+
; SSE-NEXT: retq
17+
;
18+
; AVX-LABEL: gf2p8affineqb_freeze:
19+
; AVX: # %bb.0:
20+
; AVX-NEXT: vgf2p8affineqb $11, %xmm1, %xmm1, %xmm1
21+
; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
22+
; AVX-NEXT: retq
23+
;
24+
; AVX512-LABEL: gf2p8affineqb_freeze:
25+
; AVX512: # %bb.0:
26+
; AVX512-NEXT: vpmovb2m %xmm2, %k1
27+
; AVX512-NEXT: vgf2p8affineqb $11, %xmm1, %xmm1, %xmm1
28+
; AVX512-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1}
29+
; AVX512-NEXT: retq
30+
%i = icmp slt <16 x i8> %a2, zeroinitializer
31+
%g = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %a1, <16 x i8> %a1, i8 11)
32+
%f = freeze <16 x i8> %g
33+
%r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0
34+
ret <16 x i8> %r
35+
}
36+
37+
define <16 x i8> @gf2p8affineinvqb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
38+
; SSE-LABEL: gf2p8affineinvqb_freeze:
39+
; SSE: # %bb.0:
40+
; SSE-NEXT: pxor %xmm3, %xmm3
41+
; SSE-NEXT: pcmpgtb %xmm2, %xmm3
42+
; SSE-NEXT: gf2p8affineinvqb $11, %xmm1, %xmm1
43+
; SSE-NEXT: pand %xmm3, %xmm1
44+
; SSE-NEXT: pandn %xmm0, %xmm3
45+
; SSE-NEXT: por %xmm1, %xmm3
46+
; SSE-NEXT: movdqa %xmm3, %xmm0
47+
; SSE-NEXT: retq
48+
;
49+
; AVX-LABEL: gf2p8affineinvqb_freeze:
50+
; AVX: # %bb.0:
51+
; AVX-NEXT: vgf2p8affineinvqb $11, %xmm1, %xmm1, %xmm1
52+
; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
53+
; AVX-NEXT: retq
54+
;
55+
; AVX512-LABEL: gf2p8affineinvqb_freeze:
56+
; AVX512: # %bb.0:
57+
; AVX512-NEXT: vpmovb2m %xmm2, %k1
58+
; AVX512-NEXT: vgf2p8affineinvqb $11, %xmm1, %xmm1, %xmm1
59+
; AVX512-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1}
60+
; AVX512-NEXT: retq
61+
%i = icmp slt <16 x i8> %a2, zeroinitializer
62+
%g = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %a1, <16 x i8> %a1, i8 11)
63+
%f = freeze <16 x i8> %g
64+
%r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0
65+
ret <16 x i8> %r
66+
}
67+
68+
define <16 x i8> @gf2p8mulb_freeze(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %a2) {
69+
; SSE-LABEL: gf2p8mulb_freeze:
70+
; SSE: # %bb.0:
71+
; SSE-NEXT: pxor %xmm3, %xmm3
72+
; SSE-NEXT: pcmpgtb %xmm2, %xmm3
73+
; SSE-NEXT: gf2p8mulb %xmm1, %xmm1
74+
; SSE-NEXT: pand %xmm3, %xmm1
75+
; SSE-NEXT: pandn %xmm0, %xmm3
76+
; SSE-NEXT: por %xmm1, %xmm3
77+
; SSE-NEXT: movdqa %xmm3, %xmm0
78+
; SSE-NEXT: retq
79+
;
80+
; AVX-LABEL: gf2p8mulb_freeze:
81+
; AVX: # %bb.0:
82+
; AVX-NEXT: vgf2p8mulb %xmm1, %xmm1, %xmm1
83+
; AVX-NEXT: vpblendvb %xmm2, %xmm1, %xmm0, %xmm0
84+
; AVX-NEXT: retq
85+
;
86+
; AVX512-LABEL: gf2p8mulb_freeze:
87+
; AVX512: # %bb.0:
88+
; AVX512-NEXT: vpmovb2m %xmm2, %k1
89+
; AVX512-NEXT: vgf2p8mulb %xmm1, %xmm1, %xmm1
90+
; AVX512-NEXT: vmovdqu8 %xmm1, %xmm0 {%k1}
91+
; AVX512-NEXT: retq
92+
%i = icmp slt <16 x i8> %a2, zeroinitializer
93+
%g = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %a1, <16 x i8> %a1)
94+
%f = freeze <16 x i8> %g
95+
%r = select <16 x i1> %i, <16 x i8> %f, <16 x i8> %a0
96+
ret <16 x i8> %r
97+
}
98+
99+
declare <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8>, <16 x i8>, i8)
100+
declare <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8>, <32 x i8>, i8)
101+
declare <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8>, <16 x i8>, i8)
102+
declare <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8>, <32 x i8>, i8)
103+
declare <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8>, <16 x i8>)
104+
declare <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8>, <32 x i8>)

0 commit comments

Comments
 (0)