Skip to content

Commit a01a921

Browse files
authored
[ARM] Prevent stack argument overwrite during tail calls (#166492)
For tail-calls we want to re-use the caller stack-frame and potentially need to copy stack arguments. For large stack arguments, such as by-val structs, this can lead to overwriting incoming stack arguments when preparing outgoing ones by copying them. E.g., in cases like %"struct.s1" = type { [19 x i32] } define void @f0(ptr byval(%"struct.s1") %0, ptr %1) { tail call void @F1(ptr %1, ptr byval(%"struct.s1") %0) ret void } declare void @F1(ptr, ptr) that swap arguments, the last bytes of %0 are on the stack, followed by %1. To prepare the outgoing arguments, %0 needs to be copied and %1 needs to be loaded into r0. However, currently the copy of %0 overwrites the location of %1, resulting in loading garbage into r0. We fix that by forcing the load to the pointer stack argument to happen before the copy.
1 parent dbf77e4 commit a01a921

File tree

2 files changed

+105
-1
lines changed

2 files changed

+105
-1
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2510,9 +2510,44 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
25102510

25112511
if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
25122512
Chain = DAG.getStackArgumentTokenFactor(Chain);
2513-
if (ByValTempChain)
2513+
if (ByValTempChain) {
2514+
// In case of large byval copies, re-using the stackframe for tail-calls
2515+
// can lead to overwriting incoming arguments on the stack. Force
2516+
// loading these stack arguments before the copy to avoid that.
2517+
SmallVector<SDValue, 8> IncomingLoad;
2518+
for (unsigned I = 0; I < OutVals.size(); ++I) {
2519+
if (Outs[I].Flags.isByVal())
2520+
continue;
2521+
2522+
SDValue OutVal = OutVals[I];
2523+
LoadSDNode *OutLN = dyn_cast_or_null<LoadSDNode>(OutVal);
2524+
if (!OutLN)
2525+
continue;
2526+
2527+
FrameIndexSDNode *FIN =
2528+
dyn_cast_or_null<FrameIndexSDNode>(OutLN->getBasePtr());
2529+
if (!FIN)
2530+
continue;
2531+
2532+
if (!MFI.isFixedObjectIndex(FIN->getIndex()))
2533+
continue;
2534+
2535+
for (const CCValAssign &VA : ArgLocs) {
2536+
if (VA.isMemLoc())
2537+
IncomingLoad.push_back(OutVal.getValue(1));
2538+
}
2539+
}
2540+
2541+
// Update the chain to force loads for potentially clobbered argument
2542+
// loads to happen before the byval copy.
2543+
if (!IncomingLoad.empty()) {
2544+
IncomingLoad.push_back(Chain);
2545+
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, IncomingLoad);
2546+
}
2547+
25142548
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
25152549
ByValTempChain);
2550+
}
25162551
AfterFormalArgLoads = true;
25172552
}
25182553

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; RUN: llc -mtriple thumbv7em-apple-darwin -o - < %s | FileCheck %s
2+
3+
%"struct.s1" = type { [19 x i32] }
4+
5+
define void @f0(ptr byval(%"struct.s1") %0, ptr %1) #1 {
6+
; CHECK-LABEL: _f0: @ @f0
7+
; CHECK-NEXT: @ %bb.0:
8+
; CHECK-NEXT: sub sp, #16
9+
; CHECK-NEXT: push {r4, lr}
10+
; CHECK-NEXT: sub sp, #76
11+
; CHECK-NEXT: add.w r9, sp, #84
12+
; CHECK-NEXT: stm.w r9, {r0, r1, r2, r3}
13+
; CHECK-NEXT: mov r0, sp
14+
; CHECK-NEXT: add r1, sp, #84
15+
; CHECK-NEXT: movs r2, #76
16+
; CHECK-NEXT: mov r3, r0
17+
; CHECK-NEXT: LBB0_1: @ =>This Inner Loop Header: Depth=1
18+
; CHECK-NEXT: ldr r4, [r1], #4
19+
; CHECK-NEXT: subs r2, #4
20+
; CHECK-NEXT: str r4, [r3], #4
21+
; CHECK-NEXT: bne LBB0_1
22+
; CHECK-NEXT: @ %bb.2:
23+
; CHECK-NEXT: add.w r1, r0, #12
24+
; CHECK-NEXT: add r2, sp, #100
25+
; CHECK-NEXT: ldr r0, [sp, #160]
26+
; CHECK-NEXT: ldr r3, [r1], #4
27+
; CHECK-NEXT: str r3, [r2], #4
28+
; CHECK-NEXT: ldr r3, [r1], #4
29+
; CHECK-NEXT: str r3, [r2], #4
30+
; CHECK-NEXT: ldr r3, [r1], #4
31+
; CHECK-NEXT: str r3, [r2], #4
32+
; CHECK-NEXT: ldr r3, [r1], #4
33+
; CHECK-NEXT: str r3, [r2], #4
34+
; CHECK-NEXT: ldr r3, [r1], #4
35+
; CHECK-NEXT: str r3, [r2], #4
36+
; CHECK-NEXT: ldr r3, [r1], #4
37+
; CHECK-NEXT: str r3, [r2], #4
38+
; CHECK-NEXT: ldr r3, [r1], #4
39+
; CHECK-NEXT: str r3, [r2], #4
40+
; CHECK-NEXT: ldr r3, [r1], #4
41+
; CHECK-NEXT: str r3, [r2], #4
42+
; CHECK-NEXT: ldr r3, [r1], #4
43+
; CHECK-NEXT: str r3, [r2], #4
44+
; CHECK-NEXT: ldr r3, [r1], #4
45+
; CHECK-NEXT: str r3, [r2], #4
46+
; CHECK-NEXT: ldr r3, [r1], #4
47+
; CHECK-NEXT: str r3, [r2], #4
48+
; CHECK-NEXT: ldr r3, [r1], #4
49+
; CHECK-NEXT: str r3, [r2], #4
50+
; CHECK-NEXT: ldr r3, [r1], #4
51+
; CHECK-NEXT: str r3, [r2], #4
52+
; CHECK-NEXT: ldr r3, [r1], #4
53+
; CHECK-NEXT: str r3, [r2], #4
54+
; CHECK-NEXT: ldr r3, [r1], #4
55+
; CHECK-NEXT: str r3, [r2], #4
56+
; CHECK-NEXT: ldr r3, [r1], #4
57+
; CHECK-NEXT: str r3, [r2], #4
58+
; CHECK-NEXT: ldm.w sp, {r1, r2, r3}
59+
; CHECK-NEXT: add sp, #76
60+
; CHECK-NEXT: pop.w {r4, lr}
61+
; CHECK-NEXT: add sp, #16
62+
; CHECK-NEXT: b.w _f1
63+
tail call void @f1(ptr %1, ptr byval(%"struct.s1") %0)
64+
ret void
65+
}
66+
67+
declare void @f1(ptr, ptr)
68+
69+
attributes #1 = { nounwind "frame-pointes"="non-leaf" }

0 commit comments

Comments
 (0)