Skip to content

Commit b1500b2

Browse files
Use psuedo instructions and add new tests
1 parent f60975b commit b1500b2

File tree

3 files changed

+263
-39
lines changed

3 files changed

+263
-39
lines changed

llvm/lib/Target/X86/X86InstrInfo.cpp

Lines changed: 5 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -10809,39 +10809,27 @@ void X86InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
1080910809
if (!ST.hasSSE1())
1081010810
return;
1081110811

10812-
// PXOR is safe to use because it doesn't affect flags.
10813-
BuildMI(MBB, Iter, DL, get(X86::PXORrr), Reg)
10814-
.addReg(Reg, RegState::Undef)
10815-
.addReg(Reg, RegState::Undef);
10812+
BuildMI(MBB, Iter, DL, get(X86::V_SET0), Reg);
1081610813
} else if (X86::VR256RegClass.contains(Reg)) {
1081710814
// YMM#
1081810815
if (!ST.hasAVX())
1081910816
return;
1082010817

10821-
// VPXORY is safe to use because it doesn't affect flags.
10822-
BuildMI(MBB, Iter, DL, get(X86::VPXORYrr), Reg)
10823-
.addReg(Reg, RegState::Undef)
10824-
.addReg(Reg, RegState::Undef);
10818+
BuildMI(MBB, Iter, DL, get(X86::AVX_SET0), Reg);
1082510819
} else if (X86::VR512RegClass.contains(Reg)) {
1082610820
// ZMM#
1082710821
if (!ST.hasAVX512())
1082810822
return;
1082910823

10830-
// VPXORDZ is safe to use because it doesn't affect flags.
10831-
BuildMI(MBB, Iter, DL, get(X86::VPXORDZrr), Reg)
10832-
.addReg(Reg, RegState::Undef)
10833-
.addReg(Reg, RegState::Undef);
10824+
BuildMI(MBB, Iter, DL, get(X86::AVX512_512_SET0), Reg);
1083410825
} else if (X86::VK1RegClass.contains(Reg) || X86::VK2RegClass.contains(Reg) ||
1083510826
X86::VK4RegClass.contains(Reg) || X86::VK8RegClass.contains(Reg) ||
1083610827
X86::VK16RegClass.contains(Reg)) {
1083710828
if (!ST.hasVLX())
1083810829
return;
1083910830

10840-
// KXOR is safe to use because it doesn't affect flags.
10841-
unsigned Op = ST.hasBWI() ? X86::KXORQkk : X86::KXORWkk;
10842-
BuildMI(MBB, Iter, DL, get(Op), Reg)
10843-
.addReg(Reg, RegState::Undef)
10844-
.addReg(Reg, RegState::Undef);
10831+
unsigned Op = ST.hasBWI() ? X86::KSET0Q : X86::KSET0W;
10832+
BuildMI(MBB, Iter, DL, get(Op), Reg);
1084510833
}
1084610834
}
1084710835

llvm/test/CodeGen/X86/pr163053.ll

Lines changed: 0 additions & 22 deletions
This file was deleted.
Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefixes=SSE
3+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx -verify-machineinstrs | FileCheck %s --check-prefixes=AVX
4+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 -verify-machineinstrs | FileCheck %s --check-prefixes=AVX2
5+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512VL
6+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl,+avx512bw -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512BW
7+
8+
define void @zero_xmm(<4 x i32> %arg) #0 {
9+
; SSE-LABEL: zero_xmm:
10+
; SSE: # %bb.0:
11+
; SSE-NEXT: movaps %xmm0, 0
12+
; SSE-NEXT: xorps %xmm0, %xmm0
13+
; SSE-NEXT: retq
14+
;
15+
; AVX-LABEL: zero_xmm:
16+
; AVX: # %bb.0:
17+
; AVX-NEXT: vmovaps %xmm0, 0
18+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
19+
; AVX-NEXT: retq
20+
;
21+
; AVX2-LABEL: zero_xmm:
22+
; AVX2: # %bb.0:
23+
; AVX2-NEXT: vmovaps %xmm0, 0
24+
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
25+
; AVX2-NEXT: retq
26+
;
27+
; AVX512VL-LABEL: zero_xmm:
28+
; AVX512VL: # %bb.0:
29+
; AVX512VL-NEXT: vmovaps %xmm0, 0
30+
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
31+
; AVX512VL-NEXT: retq
32+
;
33+
; AVX512BW-LABEL: zero_xmm:
34+
; AVX512BW: # %bb.0:
35+
; AVX512BW-NEXT: vmovaps %xmm0, 0
36+
; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0
37+
; AVX512BW-NEXT: retq
38+
store <4 x i32> %arg, ptr null, align 32
39+
ret void
40+
}
41+
42+
define void @zero_ymm(<8 x i32> %arg) #0 {
43+
; SSE-LABEL: zero_ymm:
44+
; SSE: # %bb.0:
45+
; SSE-NEXT: movaps %xmm1, 16
46+
; SSE-NEXT: movaps %xmm0, 0
47+
; SSE-NEXT: xorps %xmm0, %xmm0
48+
; SSE-NEXT: xorps %xmm1, %xmm1
49+
; SSE-NEXT: retq
50+
;
51+
; AVX-LABEL: zero_ymm:
52+
; AVX: # %bb.0:
53+
; AVX-NEXT: vmovaps %ymm0, 0
54+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
55+
; AVX-NEXT: vzeroupper
56+
; AVX-NEXT: retq
57+
;
58+
; AVX2-LABEL: zero_ymm:
59+
; AVX2: # %bb.0:
60+
; AVX2-NEXT: vmovaps %ymm0, 0
61+
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
62+
; AVX2-NEXT: vzeroupper
63+
; AVX2-NEXT: retq
64+
;
65+
; AVX512VL-LABEL: zero_ymm:
66+
; AVX512VL: # %bb.0:
67+
; AVX512VL-NEXT: vmovaps %ymm0, 0
68+
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
69+
; AVX512VL-NEXT: vzeroupper
70+
; AVX512VL-NEXT: retq
71+
;
72+
; AVX512BW-LABEL: zero_ymm:
73+
; AVX512BW: # %bb.0:
74+
; AVX512BW-NEXT: vmovaps %ymm0, 0
75+
; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0
76+
; AVX512BW-NEXT: vzeroupper
77+
; AVX512BW-NEXT: retq
78+
store <8 x i32> %arg, ptr null, align 32
79+
ret void
80+
}
81+
82+
define void @zero_zmm(<16 x i32> %arg) #0 {
83+
; SSE-LABEL: zero_zmm:
84+
; SSE: # %bb.0:
85+
; SSE-NEXT: movaps %xmm3, 48
86+
; SSE-NEXT: movaps %xmm2, 32
87+
; SSE-NEXT: movaps %xmm1, 16
88+
; SSE-NEXT: movaps %xmm0, 0
89+
; SSE-NEXT: xorps %xmm0, %xmm0
90+
; SSE-NEXT: xorps %xmm1, %xmm1
91+
; SSE-NEXT: xorps %xmm2, %xmm2
92+
; SSE-NEXT: xorps %xmm3, %xmm3
93+
; SSE-NEXT: retq
94+
;
95+
; AVX-LABEL: zero_zmm:
96+
; AVX: # %bb.0:
97+
; AVX-NEXT: vmovaps %ymm1, 32
98+
; AVX-NEXT: vmovaps %ymm0, 0
99+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
100+
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
101+
; AVX-NEXT: vzeroupper
102+
; AVX-NEXT: retq
103+
;
104+
; AVX2-LABEL: zero_zmm:
105+
; AVX2: # %bb.0:
106+
; AVX2-NEXT: vmovaps %ymm1, 32
107+
; AVX2-NEXT: vmovaps %ymm0, 0
108+
; AVX2-NEXT: vxorps %xmm0, %xmm0, %xmm0
109+
; AVX2-NEXT: vxorps %xmm1, %xmm1, %xmm1
110+
; AVX2-NEXT: vzeroupper
111+
; AVX2-NEXT: retq
112+
;
113+
; AVX512VL-LABEL: zero_zmm:
114+
; AVX512VL: # %bb.0:
115+
; AVX512VL-NEXT: vmovups %zmm0, 0
116+
; AVX512VL-NEXT: vxorps %xmm0, %xmm0, %xmm0
117+
; AVX512VL-NEXT: vzeroupper
118+
; AVX512VL-NEXT: retq
119+
;
120+
; AVX512BW-LABEL: zero_zmm:
121+
; AVX512BW: # %bb.0:
122+
; AVX512BW-NEXT: vmovups %zmm0, 0
123+
; AVX512BW-NEXT: vxorps %xmm0, %xmm0, %xmm0
124+
; AVX512BW-NEXT: vzeroupper
125+
; AVX512BW-NEXT: retq
126+
store <16 x i32> %arg, ptr null, align 32
127+
ret void
128+
}
129+
130+
define void @zero_k(<8 x i32> %arg, <8 x i1> %mask) #0 {
131+
; SSE-LABEL: zero_k:
132+
; SSE: # %bb.0:
133+
; SSE-NEXT: psllw $15, %xmm2
134+
; SSE-NEXT: packsswb %xmm2, %xmm2
135+
; SSE-NEXT: pmovmskb %xmm2, %eax
136+
; SSE-NEXT: testb $1, %al
137+
; SSE-NEXT: jne .LBB3_1
138+
; SSE-NEXT: # %bb.2: # %else
139+
; SSE-NEXT: testb $2, %al
140+
; SSE-NEXT: jne .LBB3_3
141+
; SSE-NEXT: .LBB3_4: # %else2
142+
; SSE-NEXT: testb $4, %al
143+
; SSE-NEXT: jne .LBB3_5
144+
; SSE-NEXT: .LBB3_6: # %else4
145+
; SSE-NEXT: testb $8, %al
146+
; SSE-NEXT: jne .LBB3_7
147+
; SSE-NEXT: .LBB3_8: # %else6
148+
; SSE-NEXT: testb $16, %al
149+
; SSE-NEXT: jne .LBB3_9
150+
; SSE-NEXT: .LBB3_10: # %else8
151+
; SSE-NEXT: testb $32, %al
152+
; SSE-NEXT: jne .LBB3_11
153+
; SSE-NEXT: .LBB3_12: # %else10
154+
; SSE-NEXT: testb $64, %al
155+
; SSE-NEXT: jne .LBB3_13
156+
; SSE-NEXT: .LBB3_14: # %else12
157+
; SSE-NEXT: testb $-128, %al
158+
; SSE-NEXT: je .LBB3_16
159+
; SSE-NEXT: .LBB3_15: # %cond.store13
160+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[3,3,3,3]
161+
; SSE-NEXT: movd %xmm0, 28
162+
; SSE-NEXT: .LBB3_16: # %else14
163+
; SSE-NEXT: xorl %eax, %eax
164+
; SSE-NEXT: pxor %xmm0, %xmm0
165+
; SSE-NEXT: pxor %xmm1, %xmm1
166+
; SSE-NEXT: pxor %xmm2, %xmm2
167+
; SSE-NEXT: retq
168+
; SSE-NEXT: .LBB3_1: # %cond.store
169+
; SSE-NEXT: movd %xmm0, 0
170+
; SSE-NEXT: testb $2, %al
171+
; SSE-NEXT: je .LBB3_4
172+
; SSE-NEXT: .LBB3_3: # %cond.store1
173+
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1]
174+
; SSE-NEXT: movd %xmm2, 4
175+
; SSE-NEXT: testb $4, %al
176+
; SSE-NEXT: je .LBB3_6
177+
; SSE-NEXT: .LBB3_5: # %cond.store3
178+
; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3]
179+
; SSE-NEXT: movd %xmm2, 8
180+
; SSE-NEXT: testb $8, %al
181+
; SSE-NEXT: je .LBB3_8
182+
; SSE-NEXT: .LBB3_7: # %cond.store5
183+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3]
184+
; SSE-NEXT: movd %xmm0, 12
185+
; SSE-NEXT: testb $16, %al
186+
; SSE-NEXT: je .LBB3_10
187+
; SSE-NEXT: .LBB3_9: # %cond.store7
188+
; SSE-NEXT: movd %xmm1, 16
189+
; SSE-NEXT: testb $32, %al
190+
; SSE-NEXT: je .LBB3_12
191+
; SSE-NEXT: .LBB3_11: # %cond.store9
192+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,1]
193+
; SSE-NEXT: movd %xmm0, 20
194+
; SSE-NEXT: testb $64, %al
195+
; SSE-NEXT: je .LBB3_14
196+
; SSE-NEXT: .LBB3_13: # %cond.store11
197+
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
198+
; SSE-NEXT: movd %xmm0, 24
199+
; SSE-NEXT: testb $-128, %al
200+
; SSE-NEXT: jne .LBB3_15
201+
; SSE-NEXT: jmp .LBB3_16
202+
;
203+
; AVX-LABEL: zero_k:
204+
; AVX: # %bb.0:
205+
; AVX-NEXT: vpmovzxwd {{.*#+}} xmm2 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero
206+
; AVX-NEXT: vpslld $31, %xmm2, %xmm2
207+
; AVX-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7]
208+
; AVX-NEXT: vpslld $31, %xmm1, %xmm1
209+
; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
210+
; AVX-NEXT: vmaskmovps %ymm0, %ymm1, 0
211+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
212+
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
213+
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
214+
; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
215+
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
216+
; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2
217+
; AVX-NEXT: vzeroupper
218+
; AVX-NEXT: retq
219+
;
220+
; AVX2-LABEL: zero_k:
221+
; AVX2: # %bb.0:
222+
; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero
223+
; AVX2-NEXT: vpslld $31, %ymm1, %ymm1
224+
; AVX2-NEXT: vpmaskmovd %ymm0, %ymm1, 0
225+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
226+
; AVX2-NEXT: vpxor %xmm0, %xmm0, %xmm0
227+
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
228+
; AVX2-NEXT: vzeroupper
229+
; AVX2-NEXT: retq
230+
;
231+
; AVX512VL-LABEL: zero_k:
232+
; AVX512VL: # %bb.0:
233+
; AVX512VL-NEXT: vpmovsxwd %xmm1, %ymm1
234+
; AVX512VL-NEXT: vpslld $31, %ymm1, %ymm1
235+
; AVX512VL-NEXT: vptestmd %ymm1, %ymm1, %k1
236+
; AVX512VL-NEXT: vmovdqa32 %ymm0, 0 {%k1}
237+
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
238+
; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0
239+
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
240+
; AVX512VL-NEXT: kxorw %k0, %k0, %k1
241+
; AVX512VL-NEXT: vzeroupper
242+
; AVX512VL-NEXT: retq
243+
;
244+
; AVX512BW-LABEL: zero_k:
245+
; AVX512BW: # %bb.0:
246+
; AVX512BW-NEXT: vpsllw $15, %xmm1, %xmm1
247+
; AVX512BW-NEXT: vpmovw2m %xmm1, %k1
248+
; AVX512BW-NEXT: vmovdqa32 %ymm0, 0 {%k1}
249+
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
250+
; AVX512BW-NEXT: vpxor %xmm0, %xmm0, %xmm0
251+
; AVX512BW-NEXT: kxorq %k0, %k0, %k1
252+
; AVX512BW-NEXT: vzeroupper
253+
; AVX512BW-NEXT: retq
254+
tail call void @llvm.masked.store.v8i32.p0(<8 x i32> %arg, ptr null, i32 32, <8 x i1> %mask)
255+
ret void
256+
}
257+
258+
attributes #0 = { "zero-call-used-regs"="used" }

0 commit comments

Comments
 (0)