Skip to content

Commit f173d48

Browse files
committed
Fix for issue #163015 where corruption can occur with passing parameters on the stack when under register pressure.
1 parent 2121bda commit f173d48

File tree

5 files changed

+176
-30
lines changed

5 files changed

+176
-30
lines changed

llvm/lib/Target/AVR/AVRExpandPseudoInsts.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1335,6 +1335,17 @@ bool AVRExpandPseudo::expand<AVR::STDWPtrQRr>(Block &MBB, BlockIt MBBI) {
13351335
return true;
13361336
}
13371337

1338+
// The instructions STDSPQRr and STDWSPQRr are used to store to the stack
1339+
// frame. The most accurate implementation would be to load the SP into
1340+
// a temporary pointer variable and then STDPtrQRr. However for efficiency,
1341+
// we assume that R29R28 contains the current call frame pointer.
1342+
// However in the PEI pass we sometimes rewrite a ADJCALLSTACKDOWN pseudo,
1343+
// plus one or more STDSPQRr/STDWSPQRr pseudo instructions to use Z for a
1344+
// stack adjustment then as a base pointer. To avoid corruption, we thus
1345+
// specify special classes of registers, like GPR8 and DREGS, but with
1346+
// the Z register removed, as the source/input to these instructions.
1347+
// (Note: R29R28 are already reserved by the ABI so would never be used
1348+
// as source registers.)
13381349
template <>
13391350
bool AVRExpandPseudo::expand<AVR::STDSPQRr>(Block &MBB, BlockIt MBBI) {
13401351
MachineInstr &MI = *MBBI;
@@ -1354,6 +1365,7 @@ bool AVRExpandPseudo::expand<AVR::STDSPQRr>(Block &MBB, BlockIt MBBI) {
13541365
return true;
13551366
}
13561367

1368+
// See the comment on STDSPQRr.
13571369
template <>
13581370
bool AVRExpandPseudo::expand<AVR::STDWSPQRr>(Block &MBB, BlockIt MBBI) {
13591371
MachineInstr &MI = *MBBI;

llvm/lib/Target/AVR/AVRInstrInfo.td

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1506,12 +1506,14 @@ def FRMIDX : Pseudo<(outs DLDREGS:$dst), (ins DLDREGS:$src, i16imm:$src2),
15061506

15071507
// This pseudo is either converted to a regular store or a push which clobbers
15081508
// SP.
1509-
def STDSPQRr : StorePseudo<(outs), (ins memspi:$dst, GPR8:$src),
1509+
let Defs = [SP], Uses = [SP], hasSideEffects = 0 in
1510+
def STDSPQRr : StorePseudo<(outs), (ins memspi:$dst, GPR8NOZ:$src),
15101511
"stdstk\t$dst, $src", [(store i8:$src, addr:$dst)]>;
15111512

15121513
// This pseudo is either converted to a regular store or a push which clobbers
15131514
// SP.
1514-
def STDWSPQRr : StorePseudo<(outs), (ins memspi:$dt, DREGS:$src),
1515+
let Defs = [SP], Uses = [SP], hasSideEffects = 0 in
1516+
def STDWSPQRr : StorePseudo<(outs), (ins memspi:$dt, DREGSNOZ:$src),
15151517
"stdwstk\t$dt, $src", [(store i16:$src, addr:$dt)]>;
15161518

15171519
// SP read/write pseudos.

llvm/lib/Target/AVR/AVRRegisterInfo.td

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,31 @@ def PTRDISPREGS : RegisterClass<"AVR", [i16], 8, (add R31R30, R29R28), ptr>;
211211
// model this using a register class containing only the Z register.
212212
def ZREG : RegisterClass<"AVR", [i16], 8, (add R31R30)>;
213213

214+
// general registers excluding Z register lo/hi, these are the only
215+
// registers that are always safe for STDSPQr instructions
216+
def GPR8NOZ : RegisterClass<"AVR", [i8], 8,
217+
(// Return value and argument registers.
218+
add R24, R25, R18, R19, R20, R21, R22, R23,
219+
// Scratch registers.
220+
R26, R27,
221+
// Callee saved registers.
222+
R28, R29, R17, R16, R15, R14, R13, R12, R11, R10,
223+
R9, R8, R7, R6, R5, R4, R3, R2, R0, R1)>;
224+
225+
// 16-bit pair register class excluding Z register lo/hi, these are the only
226+
// registers that are always safe for STDWSPQr instructions
227+
def DREGSNOZ : RegisterClass<"AVR", [i16], 8,
228+
(// Return value and arguments.
229+
add R25R24, R19R18, R21R20, R23R22,
230+
// Scratch registers.
231+
R27R26,
232+
// Callee saved registers.
233+
R29R28, R17R16, R15R14, R13R12, R11R10, R9R8,
234+
R7R6, R5R4, R3R2, R1R0,
235+
// Pseudo regs for unaligned 16-bits
236+
R26R25, R24R23, R22R21, R20R19, R18R17, R16R15,
237+
R14R13, R12R11, R10R9)>;
238+
214239
// Register class used for the stack read pseudo instruction.
215240
def GPRSP : RegisterClass<"AVR", [i16], 8, (add SP)>;
216241

llvm/test/CodeGen/AVR/dynalloca.ll

Lines changed: 101 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,34 +1,101 @@
11
; RUN: llc < %s -mtriple=avr | FileCheck %s
2-
32
declare void @foo(ptr, ptr, ptr)
43

54
define void @test1(i16 %x) {
65
; CHECK-LABEL: test1:
6+
7+
; r28/r29 are preserved/callee saved per the avr gcc ABI
8+
; CHECK: push r28
9+
; CHECK: push r29
10+
711
; Frame setup, with frame pointer
12+
; This includes the fixed size alloca (but not the dynamic alloca)
13+
; the fixed size alloca was 8 * i16, which is 16 bytes allocated from the stack
814
; CHECK: in r28, 61
915
; CHECK: in r29, 62
16+
; CHECK: sbiw r28, 16
17+
; CHECK: out 62, r29
1018
; CHECK: out 61, r28
19+
20+
; CHECK: lsl r24
21+
; CHECK: rol r25
22+
1123
; allocate first dynalloca
12-
; CHECK: in {{.*}}, 61
13-
; CHECK: in {{.*}}, 62
14-
; CHECK: sub
15-
; CHECK: sbc
24+
; note that dynalloca is allocated from the stack
25+
; but does not move the frame pointer (Y+1) down
26+
; CHECK: in [[REG1:r[0-9]+]], 61
27+
; CHECK: in [[REG2:r[0-9]+]], 62
28+
; CHECK: sub [[REG1]], r24
29+
; CHECK: sbc [[REG2]], r25
30+
; CHECK: in r0, 63
31+
; CHECK-NEXT: cli
32+
; CHECK-NEXT: out 62, [[REG2]]
33+
; CHECK-NEXT: out 63, r0
34+
; CHECK-NEXT: out 61, [[REG1]]
35+
36+
; allocate second dynalloca
37+
; CHECK: in [[REG3:r[0-9]+]], 61
38+
; CHECK: in [[REG4:r[0-9]+]], 62
39+
; CHECK: sub [[REG3]], r24
40+
; CHECK: sbc [[REG4]], r25
1641
; CHECK: in r0, 63
1742
; CHECK-NEXT: cli
18-
; CHECK-NEXT: out 62, {{.*}}
43+
; CHECK-NEXT: out 62, [[REG4]]
1944
; CHECK-NEXT: out 63, r0
20-
; CHECK-NEXT: out 61, {{.*}}
45+
; CHECK-NEXT: out 61, [[REG3]]
46+
2147
; Test writes
22-
; CHECK: std Z+13, {{.*}}
23-
; CHECK: std Z+12, {{.*}}
24-
; CHECK: std Z+7, {{.*}}
48+
; first check that writes to %a
49+
; (which points to the results of the static alloca)
50+
; go to the standard function stack frame, pointed
51+
; to by Y+1 (not the buffers created by dynamic alloca)
52+
; note that we use GEP with an offset of 2 * i16 which is 4 bytes
53+
; hence the two data bytes are stored into Y+5 and Y+6
54+
; CHECK: ldi [[TMP1:r[0-9]+]], 3
55+
; CHECK: ldi [[TMP2:r[0-9]+]], 0
56+
; CHECK: std Y+6, [[TMP2]]
57+
; CHECK: std Y+5, [[TMP1]]
58+
59+
60+
; CHECK: ldi [[TMP3:r[0-9]+]], 4
61+
; CHECK: ldi [[TMP4:r[0-9]+]], 0
62+
; CHECK: mov r30, [[REG1]]
63+
; CHECK: mov r31, [[REG2]]
64+
; here the GEP instruction is an offset of 6 * i16
65+
; making the offset Z+13
66+
; CHECK: std Z+13, [[TMP4]]
67+
; CHECK: std Z+12, [[TMP3]]
68+
69+
; CHECK: ldi [[TMP5:r[0-9]+]], 44
70+
71+
; CHECK: mov r30, [[REG3]]
72+
; CHECK: mov r31, [[REG4]]
73+
74+
; here GEP is 7 * i8
75+
; CHECK: std Z+7, [[TMP5]]
76+
77+
; check calling foo with the pointer to the static alloca buffer,
78+
; which is Y+1 by avr gcc abi conventions
79+
; CHECK: mov r24, r28
80+
; CHECK: mov r25, r29
81+
; CHECK: adiw r24, 1
82+
; CHECK: rcall foo
83+
84+
85+
86+
2587
; CHECK-NOT: std
2688
; Test SP restore
89+
; CHECK: adiw r28, 16
2790
; CHECK: in r0, 63
2891
; CHECK-NEXT: cli
2992
; CHECK-NEXT: out 62, r29
3093
; CHECK-NEXT: out 63, r0
3194
; CHECK-NEXT: out 61, r28
95+
96+
; CHECK: pop r29
97+
; CHECK: pop r28
98+
3299
%a = alloca [8 x i16]
33100
%vla = alloca i16, i16 %x
34101
%add = shl nsw i16 %x, 1
@@ -54,27 +121,27 @@ define void @dynalloca2(i16 %x) {
54121
; CHECK: in r28, 61
55122
; CHECK: in r29, 62
56123
; Allocate stack space for call
57-
; CHECK: in {{.*}}, 61
58-
; CHECK: in {{.*}}, 62
59-
; CHECK: subi
60-
; CHECK: sbci
124+
; CHECK: in r30, 61
125+
; CHECK: in r31, 62
126+
; CHECK: subi r30, 8
127+
; CHECK: sbci r31, 0
61128
; CHECK: in r0, 63
62129
; CHECK-NEXT: cli
63-
; CHECK-NEXT: out 62, {{.*}}
130+
; CHECK-NEXT: out 62, r31
64131
; CHECK-NEXT: out 63, r0
65-
; CHECK-NEXT: out 61, {{.*}}
132+
; CHECK-NEXT: out 61, r30
66133
; Store values on the stack
67-
; CHECK: ldi r20, 0
68-
; CHECK: ldi r21, 0
69-
; CHECK: std Z+8, r21
70-
; CHECK: std Z+7, r20
71-
; CHECK: std Z+6, r21
72-
; CHECK: std Z+5, r20
73-
; CHECK: std Z+4, r21
74-
; CHECK: std Z+3, r20
75-
; CHECK: std Z+2, r21
76-
; CHECK: std Z+1, r20
77-
; CHECK: call
134+
; CHECK: ldi [[REG1:r[0-9]+]], 0
135+
; CHECK: ldi [[REG2:r[0-9]+]], 0
136+
; CHECK: std Z+8, [[REG2]]
137+
; CHECK: std Z+7, [[REG1]]
138+
; CHECK: std Z+6, [[REG2]]
139+
; CHECK: std Z+5, [[REG1]]
140+
; CHECK: std Z+4, [[REG2]]
141+
; CHECK: std Z+3, [[REG1]]
142+
; CHECK: std Z+2, [[REG2]]
143+
; CHECK: std Z+1, [[REG1]]
144+
; CHECK: rcall foo2
78145
; Call frame restore
79146
; CHECK-NEXT: in r30, 61
80147
; CHECK-NEXT: in r31, 62
@@ -84,7 +151,13 @@ define void @dynalloca2(i16 %x) {
84151
; CHECK-NEXT: out 62, r31
85152
; CHECK-NEXT: out 63, r0
86153
; CHECK-NEXT: out 61, r30
87-
; SP restore
154+
; SP restore - note this might
155+
; seem redundant direction after the ADJCALLSTACKUP
156+
; I think the logic is both should be there because they come from
157+
; different mechanisms, although clearly an optimisation you could do is
158+
; discard the ADJCALLSTACKUP if you know the stack pointer is about to be
159+
; restored back to the value from the start of the function (in r28/r29)
160+
; straight afterwards
88161
; CHECK: in r0, 63
89162
; CHECK-NEXT: cli
90163
; CHECK-NEXT: out 62, r29
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
; RUN: llc < %s -mtriple=avr | FileCheck -v -dump-input always %s
2+
3+
%Tstats = type <{ i8, i8 }>
4+
@ui1 = protected local_unnamed_addr global i64 zeroinitializer, align 8
5+
@ui2 = protected local_unnamed_addr global i64 zeroinitializer, align 8
6+
@failed = private unnamed_addr addrspace(1) constant [12 x i8] c"test failed\00"
7+
@stats = external protected global %Tstats, align 1
8+
9+
; CHECK-LABEL: main:
10+
define protected noundef i32 @main(i32 %0, ptr nocapture readnone %1) local_unnamed_addr addrspace(1) #0 {
11+
entry:
12+
store i64 94, ptr @ui1, align 8
13+
store i64 53, ptr @ui2, align 8
14+
tail call addrspace(1) void @reportFailureWithDetails(i16 ptrtoint (ptr addrspace(1) @failed to i16), i16 11, i8 2, i16 32, ptr nocapture nonnull swiftself dereferenceable(2) @stats)
15+
%11 = load i64, ptr @ui1, align 8
16+
%12 = load i64, ptr @ui2, align 8
17+
18+
; COM: CHECK: call __udivdi3
19+
%15 = udiv i64 %11, %12
20+
21+
; look for the buggy pattern where r30/r31 are being clobbered, corrupting the stack pointer
22+
; CHECK-NOT: std Z+{{[1-9]+}}, r30
23+
; CHECK-NOT: std Z+{{[1-9]+}}, r31
24+
25+
; CHECK: call expect
26+
tail call addrspace(1) void @expect(i64 %15, i64 1, i16 ptrtoint (ptr addrspace(1) @failed to i16), i16 11, i8 2, i16 33)
27+
28+
; CHECK: ret
29+
ret i32 0
30+
}
31+
32+
declare protected void @expect(i64, i64, i16, i16, i8, i16) local_unnamed_addr addrspace(1) #0
33+
declare protected void @reportFailureWithDetails(i16, i16, i8, i16, ptr nocapture swiftself dereferenceable(2)) local_unnamed_addr addrspace(1) #0
34+
attributes #0 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" }

0 commit comments

Comments
 (0)