Skip to content

Commit 1d1e52e

Browse files
authored
[win][x64] Allow push/pop for stack alloc when unwind v2 is required (#153621)
While attempting to enable Windows x64 unwind v2, compilation failed with the following error: ``` fatal error: error in backend: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function '<redacted>': Cannot pop registers before the stack allocation has been deallocated ``` I traced this down to an optimization in `X86FrameLowering`: <https://github.com/llvm/llvm-project/blob/6961139ce9154d03c88b8d46c8742a1eaa569cd9/llvm/lib/Target/X86/X86FrameLowering.cpp#L324-L340> Technically, using `push`/`pop` to adjust the stack is permitted under unwind v2: the requirement for a "canonical" epilog is that the stack is fully adjusted before the registers listed as pushed in the unwind table are popped. So, as long as the `.seh_unwindv2start` pseudo is after the pops that adjust the stack, then everything will work correctly. One other side effect of this change is that the stack is now allowed to be adjusted across multiple instructions, which would be needed for extremely large stack frames.
1 parent 08ff017 commit 1d1e52e

File tree

3 files changed

+272
-66
lines changed

3 files changed

+272
-66
lines changed

llvm/lib/Target/X86/X86WinEHUnwindV2.cpp

Lines changed: 38 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -201,15 +201,11 @@ bool X86WinEHUnwindV2::runOnMachineFunction(MachineFunction &MF) {
201201
"The epilog is deallocating a stack "
202202
"allocation, but the prolog did "
203203
"not allocate one");
204-
if (HasStackDealloc)
204+
if (PoppedRegCount > 0)
205205
return rejectCurrentFunctionInternalError(
206206
MF, Mode,
207-
"The epilog is deallocating the stack "
208-
"allocation more than once");
209-
if (PoppedRegCount > 0)
210-
llvm_unreachable(
211-
"Should have raised an error: either popping before "
212-
"deallocating or deallocating without an allocation");
207+
"The epilog is deallocating a stack allocation after popping "
208+
"registers");
213209

214210
HasStackDealloc = true;
215211
} else if (State == FunctionState::FinishedEpilog)
@@ -219,33 +215,41 @@ bool X86WinEHUnwindV2::runOnMachineFunction(MachineFunction &MF) {
219215

220216
case X86::POP64r:
221217
if (State == FunctionState::InEpilog) {
222-
// After the stack pointer has been adjusted, the epilog must
223-
// POP each register in reverse order of the PUSHes in the prolog.
224-
PoppedRegCount++;
225-
if (HasStackAlloc != HasStackDealloc)
226-
return rejectCurrentFunctionInternalError(
227-
MF, Mode,
228-
"Cannot pop registers before the stack "
229-
"allocation has been deallocated");
230-
if (PoppedRegCount > PushedRegs.size())
231-
return rejectCurrentFunctionInternalError(
232-
MF, Mode,
233-
"The epilog is popping more registers than the prolog pushed");
234-
if (PushedRegs[PushedRegs.size() - PoppedRegCount] !=
235-
MI.getOperand(0).getReg())
236-
return rejectCurrentFunctionInternalError(
237-
MF, Mode,
238-
"The epilog is popping a registers in "
239-
"a different order than the "
240-
"prolog pushed them");
241-
242-
// Unwind v2 records the size of the epilog not from where we place
243-
// SEH_BeginEpilogue (as that contains the instruction to adjust the
244-
// stack pointer) but from the first POP instruction (if there is
245-
// one).
246-
if (!UnwindV2StartLocation) {
247-
assert(PoppedRegCount == 1);
248-
UnwindV2StartLocation = &MI;
218+
Register Reg = MI.getOperand(0).getReg();
219+
if (HasStackAlloc && (PoppedRegCount == 0) &&
220+
!llvm::is_contained(PushedRegs, Reg)) {
221+
// If this is a pop that doesn't correspond to the set of pushed
222+
// registers, then assume it was used to adjust the stack pointer.
223+
HasStackDealloc = true;
224+
} else {
225+
// After the stack pointer has been adjusted, the epilog must
226+
// POP each register in reverse order of the PUSHes in the prolog.
227+
PoppedRegCount++;
228+
if (HasStackAlloc != HasStackDealloc)
229+
return rejectCurrentFunctionInternalError(
230+
MF, Mode,
231+
"Cannot pop registers before the stack "
232+
"allocation has been deallocated");
233+
if (PoppedRegCount > PushedRegs.size())
234+
return rejectCurrentFunctionInternalError(
235+
MF, Mode,
236+
"The epilog is popping more registers than the prolog "
237+
"pushed");
238+
if (PushedRegs[PushedRegs.size() - PoppedRegCount] != Reg)
239+
return rejectCurrentFunctionInternalError(
240+
MF, Mode,
241+
"The epilog is popping a registers in "
242+
"a different order than the "
243+
"prolog pushed them");
244+
245+
// Unwind v2 records the size of the epilog not from where we place
246+
// SEH_BeginEpilogue (as that contains the instruction to adjust the
247+
// stack pointer) but from the first POP instruction (if there is
248+
// one).
249+
if (!UnwindV2StartLocation) {
250+
assert(PoppedRegCount == 1);
251+
UnwindV2StartLocation = &MI;
252+
}
249253
}
250254
} else if (State == FunctionState::FinishedEpilog)
251255
// Unexpected instruction after the epilog.

llvm/test/CodeGen/X86/win64-eh-unwindv2-errors.mir

Lines changed: 35 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -97,38 +97,6 @@ body: |
9797
RET64
9898
...
9999

100-
;--- double_dealloc.mir
101-
# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - %t/double_dealloc.mir \
102-
# RUN: -run-pass=x86-wineh-unwindv2 2>&1 | FileCheck %s \
103-
# RUN: --check-prefix=DOUBLE-DEALLOC
104-
# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/double_dealloc.mir \
105-
# RUN: -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \
106-
# RUN: FileCheck %s --check-prefix=BESTEFFORT
107-
# DOUBLE-DEALLOC: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'double_dealloc':
108-
# DOUBLE-DEALLOC-SAME: The epilog is deallocating the stack allocation more than once
109-
110-
--- |
111-
define dso_local void @double_dealloc() local_unnamed_addr {
112-
entry:
113-
ret void
114-
}
115-
!llvm.module.flags = !{!0}
116-
!0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
117-
...
118-
---
119-
name: double_dealloc
120-
body: |
121-
bb.0.entry:
122-
$rsp = frame-setup SUB64ri32 $rsp, 40, implicit-def dead $eflags
123-
frame-setup SEH_StackAlloc 40
124-
frame-setup SEH_EndPrologue
125-
SEH_BeginEpilogue
126-
$rsp = frame-destroy ADD64ri32 $rsp, 40, implicit-def dead $eflags
127-
$rsp = frame-destroy ADD64ri32 $rsp, 40, implicit-def dead $eflags
128-
SEH_EndEpilogue
129-
RET64
130-
...
131-
132100
;--- dealloc_after_epilog.mir
133101
# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \
134102
# RUN: %t/dealloc_after_epilog.mir -run-pass=x86-wineh-unwindv2 2>&1 | \
@@ -316,3 +284,38 @@ body: |
316284
$ecx = MOV32rr killed $eax
317285
RET64
318286
...
287+
288+
;--- dealloc_pop_dealloc.mir
289+
# RUN: not --crash llc -mtriple=x86_64-pc-windows-msvc -o - \
290+
# RUN: %t/dealloc_pop_dealloc.mir -run-pass=x86-wineh-unwindv2 2>&1 | \
291+
# RUN: FileCheck %s --check-prefix=DEALLOC-POP-DEALLOC
292+
# RUN: llc -mtriple=x86_64-pc-windows-msvc -o - %t/dealloc_pop_dealloc.mir \
293+
# RUN: -run-pass=x86-wineh-unwindv2 -x86-wineh-unwindv2-force-mode=1 | \
294+
# RUN: FileCheck %s --check-prefix=BESTEFFORT
295+
# DEALLOC-POP-DEALLOC: LLVM ERROR: Windows x64 Unwind v2 is required, but LLVM has generated incompatible code in function 'dealloc_pop_dealloc':
296+
# DEALLOC-POP-DEALLOC-SAME: The epilog is deallocating a stack allocation after popping registers
297+
298+
--- |
299+
define dso_local void @dealloc_pop_dealloc() local_unnamed_addr {
300+
entry:
301+
ret void
302+
}
303+
!llvm.module.flags = !{!0}
304+
!0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
305+
...
306+
---
307+
name: dealloc_pop_dealloc
308+
body: |
309+
bb.0.entry:
310+
frame-setup PUSH64r killed $rdi, implicit-def $rsp, implicit $rsp
311+
frame-setup SEH_PushReg 55
312+
$rsp = frame-setup SUB64ri32 $rsp, 40, implicit-def dead $eflags
313+
frame-setup SEH_StackAlloc 40
314+
frame-setup SEH_EndPrologue
315+
SEH_BeginEpilogue
316+
$rsp = frame-destroy ADD64ri32 $rsp, 20, implicit-def dead $eflags
317+
$rdi = frame-destroy POP64r implicit-def $rsp, implicit $rsp
318+
$rsp = frame-destroy ADD64ri32 $rsp, 20, implicit-def dead $eflags
319+
SEH_EndEpilogue
320+
RET64
321+
...
Lines changed: 199 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
# RUN: llc -o - %s -mtriple=x86_64-unknown-windows-msvc \
2+
# RUN: -run-pass=x86-wineh-unwindv2 | FileCheck %s
3+
4+
# Regression test for Win x64 unwind v2: in some cases it is better to use
5+
# push+pop to adjust the stack, rather than sub+add. This is permitted with
6+
# unwind v2 as the requirement is that the epilog finishes adjusting the stack
7+
# before popping the registers listed in the unwind table.
8+
9+
# Pushes and pops the same register.
10+
# CHECK-LABEL: name: push_pop_same
11+
# CHECK: body:
12+
# CHECK-NEXT: bb.0
13+
# CHECK-NEXT: SEH_UnwindVersion 2
14+
# CHECK-NEXT: frame-setup PUSH64r undef $rax
15+
# CHECK-NEXT: frame-setup SEH_StackAlloc 8
16+
# CHECK-NEXT: frame-setup SEH_EndPrologue
17+
# CHECK-NEXT: SEH_BeginEpilogue
18+
# CHECK-NEXT: $rax = frame-destroy
19+
# CHECK-NEXT: SEH_UnwindV2Start
20+
# CHECK-NEXT: SEH_EndEpilogue
21+
# CHECK-NEXT: RET64
22+
23+
# Pushes and pops a different register.
24+
# CHECK-LABEL: name: push_pop_different
25+
# CHECK: body:
26+
# CHECK-NEXT: bb.0
27+
# CHECK-NEXT: SEH_UnwindVersion 2
28+
# CHECK-NEXT: frame-setup PUSH64r undef $rax
29+
# CHECK-NEXT: frame-setup SEH_StackAlloc 8
30+
# CHECK-NEXT: frame-setup SEH_EndPrologue
31+
# CHECK: SEH_BeginEpilogue
32+
# CHECK-NEXT: $rcx = frame-destroy POP64r
33+
# CHECK-NEXT: SEH_UnwindV2Start
34+
# CHECK-NEXT: SEH_EndEpilogue
35+
# CHECK-NEXT: RET64 $eax
36+
37+
# Pushes in the prolog, adds in the epilog.
38+
# CHECK-LABEL: name: push_add
39+
# CHECK: body:
40+
# CHECK-NEXT: bb.0
41+
# CHECK-NEXT: SEH_UnwindVersion 2
42+
# CHECK-NEXT: frame-setup PUSH64r killed $r15
43+
# CHECK-NEXT: frame-setup SEH_PushReg 126
44+
# CHECK-NEXT: frame-setup PUSH64r killed $r14
45+
# CHECK-NEXT: frame-setup SEH_PushReg 125
46+
# CHECK-NEXT: frame-setup PUSH64r killed $rsi
47+
# CHECK-NEXT: frame-setup SEH_PushReg 60
48+
# CHECK-NEXT: frame-setup PUSH64r killed $rdi
49+
# CHECK-NEXT: frame-setup SEH_PushReg 55
50+
# CHECK-NEXT: frame-setup PUSH64r killed $rbx
51+
# CHECK-NEXT: frame-setup SEH_PushReg 53
52+
# CHECK-NEXT: frame-setup PUSH64r undef $rax
53+
# CHECK-NEXT: frame-setup SEH_StackAlloc 8
54+
# CHECK-NEXT: frame-setup SEH_EndPrologue
55+
# CHECK: SEH_BeginEpilogue
56+
# CHECK-NEXT: $rsp = frame-destroy ADD64ri32 $rsp, 8
57+
# CHECK-NEXT: SEH_UnwindV2Start
58+
# CHECK-NEXT: $rbx = frame-destroy POP64r
59+
# CHECK-NEXT: $rdi = frame-destroy POP64r
60+
# CHECK-NEXT: $rsi = frame-destroy POP64r
61+
# CHECK-NEXT: $r14 = frame-destroy POP64r
62+
# CHECK-NEXT: $r15 = frame-destroy POP64r
63+
# CHECK-NEXT: SEH_EndEpilogue
64+
# CHECK-NEXT: RET64
65+
66+
--- |
67+
define void @push_pop_same() {
68+
%small_alloca = alloca i32, align 4
69+
ret void
70+
}
71+
72+
define i32 @push_pop_different(i32 %x, i32 %y) {
73+
%small_alloca = alloca i32, align 4
74+
%sum = add i32 %x, %y
75+
ret i32 %sum
76+
}
77+
78+
define void @push_add(ptr %a, ptr %b, ptr %out) {
79+
%small_alloca = alloca i32, align 4
80+
%av = load i256, ptr %a, align 16
81+
%bv = load i256, ptr %b, align 16
82+
%r = mul i256 %av, %bv
83+
store i256 %r, ptr %out, align 16
84+
ret void
85+
}
86+
87+
!llvm.module.flags = !{!0}
88+
89+
!0 = !{i32 1, !"winx64-eh-unwindv2", i32 2}
90+
...
91+
---
92+
name: push_pop_same
93+
body: |
94+
bb.0 (%ir-block.0):
95+
frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp
96+
frame-setup SEH_StackAlloc 8
97+
frame-setup SEH_EndPrologue
98+
SEH_BeginEpilogue
99+
$rax = frame-destroy POP64r implicit-def $rsp, implicit $rsp
100+
SEH_EndEpilogue
101+
RET64
102+
...
103+
---
104+
name: push_pop_different
105+
body: |
106+
bb.0 (%ir-block.0):
107+
frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp
108+
frame-setup SEH_StackAlloc 8
109+
frame-setup SEH_EndPrologue
110+
renamable $edx = KILL $edx, implicit-def $rdx
111+
renamable $ecx = KILL $ecx, implicit-def $rcx
112+
renamable $eax = LEA64_32r killed renamable $rcx, 1, killed renamable $rdx, 0, $noreg
113+
SEH_BeginEpilogue
114+
$rcx = frame-destroy POP64r implicit-def $rsp, implicit $rsp
115+
SEH_EndEpilogue
116+
RET64 $eax
117+
...
118+
---
119+
name: push_add
120+
body: |
121+
bb.0 (%ir-block.0):
122+
123+
frame-setup PUSH64r killed $r15, implicit-def $rsp, implicit $rsp
124+
frame-setup SEH_PushReg 126
125+
frame-setup PUSH64r killed $r14, implicit-def $rsp, implicit $rsp
126+
frame-setup SEH_PushReg 125
127+
frame-setup PUSH64r killed $rsi, implicit-def $rsp, implicit $rsp
128+
frame-setup SEH_PushReg 60
129+
frame-setup PUSH64r killed $rdi, implicit-def $rsp, implicit $rsp
130+
frame-setup SEH_PushReg 55
131+
frame-setup PUSH64r killed $rbx, implicit-def $rsp, implicit $rsp
132+
frame-setup SEH_PushReg 53
133+
frame-setup PUSH64r undef $rax, implicit-def $rsp, implicit $rsp
134+
frame-setup SEH_StackAlloc 8
135+
frame-setup SEH_EndPrologue
136+
$rsi = MOV64rr $rdx
137+
renamable $r9 = MOV64rm renamable $rcx, 1, $noreg, 0, $noreg :: (load (s64) from %ir.a, align 16)
138+
renamable $rdi = MOV64rm renamable $rcx, 1, $noreg, 8, $noreg :: (load (s64) from %ir.a + 8, basealign 16)
139+
renamable $rbx = MOV64rm renamable $rcx, 1, $noreg, 16, $noreg :: (load (s64) from %ir.a + 16, align 16)
140+
renamable $r10 = MOV64rm $rdx, 1, $noreg, 16, $noreg :: (load (s64) from %ir.b + 16, align 16)
141+
renamable $r11 = MOV64rm $rdx, 1, $noreg, 0, $noreg :: (load (s64) from %ir.b, align 16)
142+
renamable $r14 = MOV64rm $rdx, 1, $noreg, 8, $noreg :: (load (s64) from %ir.b + 8, basealign 16)
143+
renamable $r15 = MOV64rm killed renamable $rcx, 1, $noreg, 24, $noreg :: (load (s64) from %ir.a + 24, basealign 16)
144+
renamable $r15 = IMUL64rr killed renamable $r15, renamable $r11, implicit-def dead $eflags
145+
$rax = MOV64rr $r11
146+
MUL64r renamable $rbx, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
147+
$rcx = MOV64rr $rax
148+
renamable $rbx = IMUL64rr killed renamable $rbx, renamable $r14, implicit-def dead $eflags
149+
renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $rdx, implicit-def dead $eflags
150+
renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $r15, implicit-def dead $eflags
151+
$r15 = MOV64rr $r10
152+
renamable $r15 = IMUL64rr killed renamable $r15, renamable $rdi, implicit-def dead $eflags
153+
$rax = MOV64rr killed $r10
154+
MUL64r renamable $r9, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
155+
$r10 = MOV64rr $rax
156+
renamable $rdx = ADD64rr killed renamable $rdx, killed renamable $r15, implicit-def dead $eflags
157+
renamable $r15 = MOV64rm killed renamable $rsi, 1, $noreg, 24, $noreg :: (load (s64) from %ir.b + 24, basealign 16)
158+
renamable $r15 = IMUL64rr killed renamable $r15, renamable $r9, implicit-def dead $eflags
159+
renamable $r15 = ADD64rr killed renamable $r15, killed renamable $rdx, implicit-def dead $eflags
160+
renamable $r10 = ADD64rr killed renamable $r10, killed renamable $rcx, implicit-def $eflags
161+
renamable $r15 = ADC64rr killed renamable $r15, killed renamable $rbx, implicit-def dead $eflags, implicit killed $eflags
162+
$rax = MOV64rr $r9
163+
MUL64r renamable $r11, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
164+
$rcx = MOV64rr $rdx
165+
$rsi = MOV64rr $rax
166+
$rax = MOV64rr $rdi
167+
MUL64r killed renamable $r11, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
168+
$r11 = MOV64rr $rdx
169+
$rbx = MOV64rr $rax
170+
renamable $rbx = ADD64rr killed renamable $rbx, killed renamable $rcx, implicit-def $eflags
171+
renamable $r11 = ADC64ri32 killed renamable $r11, 0, implicit-def dead $eflags, implicit killed $eflags
172+
$rax = MOV64rr killed $r9
173+
MUL64r renamable $r14, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
174+
$rcx = MOV64rr $rdx
175+
$r9 = MOV64rr $rax
176+
renamable $r9 = ADD64rr killed renamable $r9, killed renamable $rbx, implicit-def $eflags
177+
renamable $rcx = ADC64rr killed renamable $rcx, killed renamable $r11, implicit-def $eflags, implicit killed $eflags
178+
renamable $al = SETCCr 2, implicit killed $eflags
179+
renamable $r11d = MOVZX32rr8 killed renamable $al, implicit-def $r11
180+
$rax = MOV64rr killed $rdi
181+
MUL64r killed renamable $r14, implicit-def $rax, implicit-def $rdx, implicit-def dead $eflags, implicit $rax
182+
renamable $rax = ADD64rr killed renamable $rax, killed renamable $rcx, implicit-def $eflags
183+
renamable $rdx = ADC64rr killed renamable $rdx, killed renamable $r11, implicit-def dead $eflags, implicit killed $eflags
184+
renamable $rax = ADD64rr killed renamable $rax, killed renamable $r10, implicit-def $eflags
185+
renamable $rdx = ADC64rr killed renamable $rdx, killed renamable $r15, implicit-def dead $eflags, implicit killed $eflags
186+
MOV64mr renamable $r8, 1, $noreg, 0, $noreg, killed renamable $rsi :: (store (s64) into %ir.out, align 16)
187+
MOV64mr renamable $r8, 1, $noreg, 8, $noreg, killed renamable $r9 :: (store (s64) into %ir.out + 8, basealign 16)
188+
MOV64mr renamable $r8, 1, $noreg, 16, $noreg, killed renamable $rax :: (store (s64) into %ir.out + 16, align 16)
189+
MOV64mr killed renamable $r8, 1, $noreg, 24, $noreg, killed renamable $rdx :: (store (s64) into %ir.out + 24, basealign 16)
190+
SEH_BeginEpilogue
191+
$rsp = frame-destroy ADD64ri32 $rsp, 8, implicit-def dead $eflags
192+
$rbx = frame-destroy POP64r implicit-def $rsp, implicit $rsp
193+
$rdi = frame-destroy POP64r implicit-def $rsp, implicit $rsp
194+
$rsi = frame-destroy POP64r implicit-def $rsp, implicit $rsp
195+
$r14 = frame-destroy POP64r implicit-def $rsp, implicit $rsp
196+
$r15 = frame-destroy POP64r implicit-def $rsp, implicit $rsp
197+
SEH_EndEpilogue
198+
RET64
199+
...

0 commit comments

Comments
 (0)