Skip to content

Commit f80e08a

Browse files
committed
[ARM] Prevent stack argument overwrite during tail-call
For tail-calls we want to re-use the caller stack-frame and potentially need to copy stack arguments. For large stack arguments, such as by-val structs, this can lead to overwriting incoming stack arguments when preparing outgoing ones by copying them. E.g., in cases like %"struct.s1" = type { [19 x i32] } define void @f0(ptr byval(%"struct.s1") %0, ptr %1) { tail call void @F1(ptr %1, ptr byval(%"struct.s1") %0) ret void } declare void @F1(ptr, ptr) that swap arguments, the last bytes of %0 are on the stack, followed by %1. To prepare the outgoing arguments, %0 needs to be copied and %1 needs to be loaded into r0. However, currently the copy of %0 overwrites the location of %1, resulting in loading garbage into r0. We fix that by forcing the load to the pointer stack argument to happen before the copy.
1 parent 4f96f6f commit f80e08a

File tree

2 files changed

+70
-35
lines changed

2 files changed

+70
-35
lines changed

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2510,9 +2510,44 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
25102510

25112511
if (isTailCall && VA.isMemLoc() && !AfterFormalArgLoads) {
25122512
Chain = DAG.getStackArgumentTokenFactor(Chain);
2513-
if (ByValTempChain)
2513+
if (ByValTempChain) {
2514+
// In case of large byval copies, re-using the stackframe for tail-calls
2515+
// can lead to overwriting incoming arguments on the stack. Force
2516+
// loading these stack arguments before the copy to avoid that.
2517+
SmallVector<SDValue, 8> IncomingLoad;
2518+
for (unsigned I = 0; I < OutVals.size(); ++I) {
2519+
if (Outs[I].Flags.isByVal())
2520+
continue;
2521+
2522+
SDValue OutVal = OutVals[I];
2523+
LoadSDNode *OutLN = dyn_cast_or_null<LoadSDNode>(OutVal);
2524+
if (!OutLN)
2525+
continue;
2526+
2527+
FrameIndexSDNode *FIN =
2528+
dyn_cast_or_null<FrameIndexSDNode>(OutLN->getBasePtr());
2529+
if (!FIN)
2530+
continue;
2531+
2532+
if (!MFI.isFixedObjectIndex(FIN->getIndex()))
2533+
continue;
2534+
2535+
for (const CCValAssign &VA : ArgLocs) {
2536+
if (VA.isMemLoc())
2537+
IncomingLoad.push_back(OutVal.getValue(1));
2538+
}
2539+
}
2540+
2541+
// Update the chain to force loads for potentially clobbered argument
2542+
// loads to happen before the byval copy.
2543+
if (!IncomingLoad.empty()) {
2544+
IncomingLoad.push_back(Chain);
2545+
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, IncomingLoad);
2546+
}
2547+
25142548
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain,
25152549
ByValTempChain);
2550+
}
25162551
AfterFormalArgLoads = true;
25172552
}
25182553

llvm/test/CodeGen/ARM/byval_struct_copy_tailcall.ll

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -21,41 +21,41 @@ define void @f0(ptr byval(%"struct.s1") %0, ptr %1) #1 {
2121
; CHECK-NEXT: bne LBB0_1
2222
; CHECK-NEXT: @ %bb.2:
2323
; CHECK-NEXT: add.w r1, r0, #12
24-
; CHECK-NEXT: add r0, sp, #100
25-
; CHECK-NEXT: ldr r2, [r1], #4
26-
; CHECK-NEXT: str r2, [r0], #4
27-
; CHECK-NEXT: ldr r2, [r1], #4
28-
; CHECK-NEXT: str r2, [r0], #4
29-
; CHECK-NEXT: ldr r2, [r1], #4
30-
; CHECK-NEXT: str r2, [r0], #4
31-
; CHECK-NEXT: ldr r2, [r1], #4
32-
; CHECK-NEXT: str r2, [r0], #4
33-
; CHECK-NEXT: ldr r2, [r1], #4
34-
; CHECK-NEXT: str r2, [r0], #4
35-
; CHECK-NEXT: ldr r2, [r1], #4
36-
; CHECK-NEXT: str r2, [r0], #4
37-
; CHECK-NEXT: ldr r2, [r1], #4
38-
; CHECK-NEXT: str r2, [r0], #4
39-
; CHECK-NEXT: ldr r2, [r1], #4
40-
; CHECK-NEXT: str r2, [r0], #4
41-
; CHECK-NEXT: ldr r2, [r1], #4
42-
; CHECK-NEXT: str r2, [r0], #4
43-
; CHECK-NEXT: ldr r2, [r1], #4
44-
; CHECK-NEXT: str r2, [r0], #4
45-
; CHECK-NEXT: ldr r2, [r1], #4
46-
; CHECK-NEXT: str r2, [r0], #4
47-
; CHECK-NEXT: ldr r2, [r1], #4
48-
; CHECK-NEXT: str r2, [r0], #4
49-
; CHECK-NEXT: ldr r2, [r1], #4
50-
; CHECK-NEXT: str r2, [r0], #4
51-
; CHECK-NEXT: ldr r2, [r1], #4
52-
; CHECK-NEXT: str r2, [r0], #4
53-
; CHECK-NEXT: ldr r2, [r1], #4
54-
; CHECK-NEXT: str r2, [r0], #4
55-
; CHECK-NEXT: ldr r2, [r1], #4
56-
; CHECK-NEXT: str r2, [r0], #4
57-
; CHECK-NEXT: ldm.w sp, {r1, r2, r3}
24+
; CHECK-NEXT: add r2, sp, #100
5825
; CHECK-NEXT: ldr r0, [sp, #160]
26+
; CHECK-NEXT: ldr r3, [r1], #4
27+
; CHECK-NEXT: str r3, [r2], #4
28+
; CHECK-NEXT: ldr r3, [r1], #4
29+
; CHECK-NEXT: str r3, [r2], #4
30+
; CHECK-NEXT: ldr r3, [r1], #4
31+
; CHECK-NEXT: str r3, [r2], #4
32+
; CHECK-NEXT: ldr r3, [r1], #4
33+
; CHECK-NEXT: str r3, [r2], #4
34+
; CHECK-NEXT: ldr r3, [r1], #4
35+
; CHECK-NEXT: str r3, [r2], #4
36+
; CHECK-NEXT: ldr r3, [r1], #4
37+
; CHECK-NEXT: str r3, [r2], #4
38+
; CHECK-NEXT: ldr r3, [r1], #4
39+
; CHECK-NEXT: str r3, [r2], #4
40+
; CHECK-NEXT: ldr r3, [r1], #4
41+
; CHECK-NEXT: str r3, [r2], #4
42+
; CHECK-NEXT: ldr r3, [r1], #4
43+
; CHECK-NEXT: str r3, [r2], #4
44+
; CHECK-NEXT: ldr r3, [r1], #4
45+
; CHECK-NEXT: str r3, [r2], #4
46+
; CHECK-NEXT: ldr r3, [r1], #4
47+
; CHECK-NEXT: str r3, [r2], #4
48+
; CHECK-NEXT: ldr r3, [r1], #4
49+
; CHECK-NEXT: str r3, [r2], #4
50+
; CHECK-NEXT: ldr r3, [r1], #4
51+
; CHECK-NEXT: str r3, [r2], #4
52+
; CHECK-NEXT: ldr r3, [r1], #4
53+
; CHECK-NEXT: str r3, [r2], #4
54+
; CHECK-NEXT: ldr r3, [r1], #4
55+
; CHECK-NEXT: str r3, [r2], #4
56+
; CHECK-NEXT: ldr r3, [r1], #4
57+
; CHECK-NEXT: str r3, [r2], #4
58+
; CHECK-NEXT: ldm.w sp, {r1, r2, r3}
5959
; CHECK-NEXT: add sp, #76
6060
; CHECK-NEXT: pop.w {r4, lr}
6161
; CHECK-NEXT: add sp, #16

0 commit comments

Comments
 (0)