Skip to content

Commit 4c71a98

Browse files
committed
[X86][APX] Prevent from emitting push2/pop2 when stack alignment < 16 bytes
push2/pop2 requires 16 bytes stack alignment. If the stack alignment is less than that, push2/pop2 should not be emitted.
1 parent ea709c7 commit 4c71a98

File tree

2 files changed

+210
-1
lines changed

2 files changed

+210
-1
lines changed

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2921,7 +2921,7 @@ bool X86FrameLowering::assignCalleeSavedSpillSlots(
29212921
// 3. When the number of CSR push is even, start to use push2 from the 1st
29222922
// push and make the stack 16B aligned before the push
29232923
unsigned NumRegsForPush2 = 0;
2924-
if (STI.hasPush2Pop2()) {
2924+
if (STI.hasPush2Pop2() && getStackAlignment() >= 16) {
29252925
unsigned NumCSGPR = llvm::count_if(CSI, [](const CalleeSavedInfo &I) {
29262926
return X86::GR64RegClass.contains(I.getReg());
29272927
});
Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+push2pop2 | FileCheck %s --check-prefix=CHECK
3+
4+
; This test is used to check no push2/pop2 emitted if stack alignment is set to
5+
; the value less than 16 bytes required by push2/pop2 instruction. Here it's set
6+
; to 8 bytes.
7+
8+
define void @csr1() nounwind {
9+
; CHECK-LABEL: csr1:
10+
; CHECK: # %bb.0: # %entry
11+
; CHECK-NEXT: pushq %rbp
12+
; CHECK-NEXT: #APP
13+
; CHECK-NEXT: #NO_APP
14+
; CHECK-NEXT: popq %rbp
15+
; CHECK-NEXT: retq
16+
entry:
17+
tail call void asm sideeffect "", "~{rbp},~{dirflag},~{fpsr},~{flags}"()
18+
ret void
19+
}
20+
21+
define void @csr2() nounwind {
22+
; CHECK-LABEL: csr2:
23+
; CHECK: # %bb.0: # %entry
24+
; CHECK-NEXT: pushq %rbp
25+
; CHECK-NEXT: pushq %r15
26+
; CHECK-NEXT: #APP
27+
; CHECK-NEXT: #NO_APP
28+
; CHECK-NEXT: popq %r15
29+
; CHECK-NEXT: popq %rbp
30+
; CHECK-NEXT: retq
31+
entry:
32+
tail call void asm sideeffect "", "~{rbp},~{r15},~{dirflag},~{fpsr},~{flags}"()
33+
ret void
34+
}
35+
36+
define void @csr3() nounwind {
37+
; CHECK-LABEL: csr3:
38+
; CHECK: # %bb.0: # %entry
39+
; CHECK-NEXT: pushq %rbp
40+
; CHECK-NEXT: pushq %r15
41+
; CHECK-NEXT: pushq %r14
42+
; CHECK-NEXT: #APP
43+
; CHECK-NEXT: #NO_APP
44+
; CHECK-NEXT: popq %r14
45+
; CHECK-NEXT: popq %r15
46+
; CHECK-NEXT: popq %rbp
47+
; CHECK-NEXT: retq
48+
entry:
49+
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{dirflag},~{fpsr},~{flags}"()
50+
ret void
51+
}
52+
53+
define void @csr4() nounwind {
54+
; CHECK-LABEL: csr4:
55+
; CHECK: # %bb.0: # %entry
56+
; CHECK-NEXT: pushq %rbp
57+
; CHECK-NEXT: pushq %r15
58+
; CHECK-NEXT: pushq %r14
59+
; CHECK-NEXT: pushq %r13
60+
; CHECK-NEXT: #APP
61+
; CHECK-NEXT: #NO_APP
62+
; CHECK-NEXT: popq %r13
63+
; CHECK-NEXT: popq %r14
64+
; CHECK-NEXT: popq %r15
65+
; CHECK-NEXT: popq %rbp
66+
; CHECK-NEXT: retq
67+
entry:
68+
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{dirflag},~{fpsr},~{flags}"()
69+
ret void
70+
}
71+
72+
define void @csr5() nounwind {
73+
; CHECK-LABEL: csr5:
74+
; CHECK: # %bb.0: # %entry
75+
; CHECK-NEXT: pushq %rbp
76+
; CHECK-NEXT: pushq %r15
77+
; CHECK-NEXT: pushq %r14
78+
; CHECK-NEXT: pushq %r13
79+
; CHECK-NEXT: pushq %r12
80+
; CHECK-NEXT: #APP
81+
; CHECK-NEXT: #NO_APP
82+
; CHECK-NEXT: popq %r12
83+
; CHECK-NEXT: popq %r13
84+
; CHECK-NEXT: popq %r14
85+
; CHECK-NEXT: popq %r15
86+
; CHECK-NEXT: popq %rbp
87+
; CHECK-NEXT: retq
88+
entry:
89+
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{r12},~{dirflag},~{fpsr},~{flags}"()
90+
ret void
91+
}
92+
93+
define void @csr6() nounwind {
94+
; CHECK-LABEL: csr6:
95+
; CHECK: # %bb.0: # %entry
96+
; CHECK-NEXT: pushq %rbp
97+
; CHECK-NEXT: pushq %r15
98+
; CHECK-NEXT: pushq %r14
99+
; CHECK-NEXT: pushq %r13
100+
; CHECK-NEXT: pushq %r12
101+
; CHECK-NEXT: pushq %rbx
102+
; CHECK-NEXT: #APP
103+
; CHECK-NEXT: #NO_APP
104+
; CHECK-NEXT: popq %rbx
105+
; CHECK-NEXT: popq %r12
106+
; CHECK-NEXT: popq %r13
107+
; CHECK-NEXT: popq %r14
108+
; CHECK-NEXT: popq %r15
109+
; CHECK-NEXT: popq %rbp
110+
; CHECK-NEXT: retq
111+
entry:
112+
tail call void asm sideeffect "", "~{rbp},~{r15},~{r14},~{r13},~{r12},~{rbx},~{dirflag},~{fpsr},~{flags}"()
113+
ret void
114+
}
115+
116+
declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg)
117+
118+
define void @lea_in_epilog(i1 %arg, ptr %arg1, ptr %arg2, i64 %arg3, i64 %arg4, i64 %arg5, i64 %arg6, i64 %arg7, i64 %arg8, i64 %arg9, i64 %arg10) nounwind {
119+
; CHECK-LABEL: lea_in_epilog:
120+
; CHECK: # %bb.0: # %bb
121+
; CHECK-NEXT: testb $1, %dil
122+
; CHECK-NEXT: je .LBB6_5
123+
; CHECK-NEXT: # %bb.1: # %bb13
124+
; CHECK-NEXT: pushq %rbp
125+
; CHECK-NEXT: pushq %r15
126+
; CHECK-NEXT: pushq %r14
127+
; CHECK-NEXT: pushq %r13
128+
; CHECK-NEXT: pushq %r12
129+
; CHECK-NEXT: pushq %rbx
130+
; CHECK-NEXT: subq $16, %rsp
131+
; CHECK-NEXT: movq %r9, %r14
132+
; CHECK-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
133+
; CHECK-NEXT: addq {{[0-9]+}}(%rsp), %r14
134+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r13
135+
; CHECK-NEXT: addq %r14, %r13
136+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %r15
137+
; CHECK-NEXT: addq %r14, %r15
138+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rbx
139+
; CHECK-NEXT: addq %r14, %rbx
140+
; CHECK-NEXT: xorl %ebp, %ebp
141+
; CHECK-NEXT: xorl %r12d, %r12d
142+
; CHECK-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
143+
; CHECK-NEXT: .p2align 4
144+
; CHECK-NEXT: .LBB6_2: # %bb15
145+
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
146+
; CHECK-NEXT: incq %r12
147+
; CHECK-NEXT: movl $432, %edx # imm = 0x1B0
148+
; CHECK-NEXT: xorl %edi, %edi
149+
; CHECK-NEXT: movq %r15, %rsi
150+
; CHECK-NEXT: callq memcpy@PLT
151+
; CHECK-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Reload
152+
; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax
153+
; CHECK-NEXT: addq %rax, %r13
154+
; CHECK-NEXT: addq %rax, %r15
155+
; CHECK-NEXT: addq %rax, %rbx
156+
; CHECK-NEXT: addq %rax, %r14
157+
; CHECK-NEXT: addq $8, %rbp
158+
; CHECK-NEXT: testb $1, %dil
159+
; CHECK-NEXT: je .LBB6_2
160+
; CHECK-NEXT: # %bb.3: # %bb11
161+
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
162+
; CHECK-NEXT: leaq {{[0-9]+}}(%rsp), %rsp
163+
; CHECK-NEXT: popq %rbx
164+
; CHECK-NEXT: popq %r12
165+
; CHECK-NEXT: popq %r13
166+
; CHECK-NEXT: popq %r14
167+
; CHECK-NEXT: popq %r15
168+
; CHECK-NEXT: popq %rbp
169+
; CHECK-NEXT: jne .LBB6_5
170+
; CHECK-NEXT: # %bb.4: # %bb12
171+
; CHECK-NEXT: movq $0, (%rax)
172+
; CHECK-NEXT: .LBB6_5: # %bb14
173+
; CHECK-NEXT: retq
174+
bb:
175+
br i1 %arg, label %bb13, label %bb14
176+
177+
bb11:
178+
br i1 %arg, label %bb14, label %bb12
179+
180+
bb12:
181+
store double 0.000000e+00, ptr %arg1, align 8
182+
br label %bb14
183+
184+
bb13:
185+
%getelementptr = getelementptr i8, ptr null, i64 %arg5
186+
br label %bb15
187+
188+
bb14:
189+
ret void
190+
191+
bb15:
192+
%phi = phi i64 [ 0, %bb13 ], [ %add, %bb15 ]
193+
%getelementptr16 = getelementptr double, ptr null, i64 %phi
194+
%add = add i64 %phi, 1
195+
%mul = mul i64 %arg6, %add
196+
%getelementptr17 = getelementptr i8, ptr %getelementptr, i64 %mul
197+
call void @llvm.memcpy.p0.p0.i64(ptr %getelementptr16, ptr %getelementptr17, i64 0, i1 false)
198+
%getelementptr18 = getelementptr i8, ptr %getelementptr17, i64 %arg7
199+
%getelementptr19 = getelementptr i8, ptr %getelementptr17, i64 %arg8
200+
call void @llvm.memcpy.p0.p0.i64(ptr null, ptr %getelementptr19, i64 0, i1 false)
201+
%getelementptr20 = getelementptr i8, ptr %getelementptr17, i64 %arg9
202+
call void @llvm.memcpy.p0.p0.i64(ptr null, ptr %getelementptr20, i64 432, i1 false)
203+
%getelementptr21 = getelementptr i8, ptr %getelementptr17, i64 %arg10
204+
call void @llvm.memcpy.p0.p0.i64(ptr null, ptr %getelementptr21, i64 0, i1 false)
205+
br i1 %arg, label %bb11, label %bb15
206+
}
207+
208+
!llvm.module.flags = !{!0}
209+
!0 = !{i32 1, !"override-stack-alignment", i32 8}

0 commit comments

Comments
 (0)