Skip to content

Commit e4340b3

Browse files
authored
pulley: Fix regalloc of return-call-indirect (bytecodealliance#10021)
This commit fixes an issue in the Pulley backend for the `return_call_indirect` implementation. This brings Pulley in line with other backends to use a fixed register for the indirect call location which is caller-save instead of possibly using a callee-save register. If a callee-save-register is used then the location to be jumped to is clobbered by register restores and won't have the correct location to jump to. This additionally required updating the Pulley ABI slightly. Previously all caller-saved registers were considered argument registers meaning that there weren't any actual available registers to use for the jump destination. To handle this I've decreased the number of argument registers by 1 so there's a single register used for the return-call-indirect destination available which is also caller-saved.
1 parent cc8df84 commit e4340b3

File tree

5 files changed

+279
-267
lines changed

5 files changed

+279
-267
lines changed

cranelift/codegen/src/isa/pulley_shared/abi.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,8 +62,14 @@ where
6262
) -> CodegenResult<(u32, Option<usize>)> {
6363
// NB: make sure this method stays in sync with
6464
// `cranelift_pulley::interp::Vm::call`.
65+
//
66+
// In general we use the first half of all register banks as argument
67+
// passing registers because, well, why not for now. Currently the only
68+
// exception is x15 which is reserved as a single caller-saved register
69+
// not used for arguments. This is used in `ReturnCallIndirect` to hold
70+
// the location of where we're jumping to.
6571

66-
let x_end = 15;
72+
let x_end = 14;
6773
let f_end = 15;
6874
let v_end = 15;
6975

cranelift/codegen/src/isa/pulley_shared/inst/mod.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,17 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
200200
}
201201
}
202202
Inst::ReturnIndirectCall { info } => {
203-
collector.reg_use(&mut info.dest);
203+
// Use a fixed location of where to store the value to
204+
// return-call-to. Using a fixed location prevents this register
205+
// from being allocated to a callee-saved register which will get
206+
// clobbered during the register restores just before the
207+
// return-call.
208+
//
209+
// Also note that `x15` is specifically the last caller-saved
210+
// register and, at this time, the only non-argument caller-saved
211+
// register. This register allocation constraint is why it's not an
212+
// argument register.
213+
collector.reg_fixed_use(&mut info.dest, regs::x15());
204214

205215
for CallArgPair { vreg, preg } in &mut info.uses {
206216
collector.reg_fixed_use(vreg, *preg);

cranelift/filetests/filetests/isa/pulley32/call.clif

Lines changed: 107 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -129,52 +129,52 @@ block0:
129129
}
130130

131131
; VCode:
132-
; push_frame_save 48, {}
132+
; push_frame_save 64, {}
133133
; block0:
134-
; xzero x15
135-
; xstore64 OutgoingArg(0), x15 // flags = notrap aligned
136-
; xstore64 OutgoingArg(8), x15 // flags = notrap aligned
137-
; xstore64 OutgoingArg(16), x15 // flags = notrap aligned
138-
; xstore64 OutgoingArg(24), x15 // flags = notrap aligned
139-
; xstore64 OutgoingArg(32), x15 // flags = notrap aligned
140-
; xstore64 OutgoingArg(40), x15 // flags = notrap aligned
141-
; xmov x4, x15
142-
; xmov x5, x15
143-
; xmov x6, x15
144-
; xmov x7, x15
145-
; xmov x8, x15
146-
; xmov x9, x15
147-
; xmov x10, x15
148-
; xmov x11, x15
149-
; xmov x12, x15
150-
; xmov x13, x15
151-
; xmov x14, x15
152-
; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p15i), XReg(p15i), XReg(p15i), XReg(p15i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }, CallArgPair { vreg: p15i, preg: p15i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 }
153-
; pop_frame_restore 48, {}
134+
; xzero x14
135+
; xstore64 OutgoingArg(0), x14 // flags = notrap aligned
136+
; xstore64 OutgoingArg(8), x14 // flags = notrap aligned
137+
; xstore64 OutgoingArg(16), x14 // flags = notrap aligned
138+
; xstore64 OutgoingArg(24), x14 // flags = notrap aligned
139+
; xstore64 OutgoingArg(32), x14 // flags = notrap aligned
140+
; xstore64 OutgoingArg(40), x14 // flags = notrap aligned
141+
; xstore64 OutgoingArg(48), x14 // flags = notrap aligned
142+
; xmov x4, x14
143+
; xmov x5, x14
144+
; xmov x6, x14
145+
; xmov x7, x14
146+
; xmov x8, x14
147+
; xmov x9, x14
148+
; xmov x10, x14
149+
; xmov x11, x14
150+
; xmov x12, x14
151+
; xmov x13, x14
152+
; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p14i), XReg(p14i), XReg(p14i), XReg(p14i)] }, uses: [CallArgPair { vreg: p4i, preg: p4i }, CallArgPair { vreg: p5i, preg: p5i }, CallArgPair { vreg: p6i, preg: p6i }, CallArgPair { vreg: p7i, preg: p7i }, CallArgPair { vreg: p8i, preg: p8i }, CallArgPair { vreg: p9i, preg: p9i }, CallArgPair { vreg: p10i, preg: p10i }, CallArgPair { vreg: p11i, preg: p11i }, CallArgPair { vreg: p12i, preg: p12i }, CallArgPair { vreg: p13i, preg: p13i }, CallArgPair { vreg: p14i, preg: p14i }], defs: [], clobbers: PRegSet { bits: [65535, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 }
153+
; pop_frame_restore 64, {}
154154
; ret
155155
;
156156
; Disassembled:
157-
; push_frame_save 48,
158-
; xzero x15
159-
; xstore64le_offset8 sp, 0, x15
160-
; xstore64le_offset8 sp, 8, x15
161-
; xstore64le_offset8 sp, 16, x15
162-
; xstore64le_offset8 sp, 24, x15
163-
; xstore64le_offset8 sp, 32, x15
164-
; xstore64le_offset8 sp, 40, x15
165-
; xmov x4, x15
166-
; xmov x5, x15
167-
; xmov x6, x15
168-
; xmov x7, x15
169-
; xmov x8, x15
170-
; xmov x9, x15
171-
; xmov x10, x15
172-
; xmov x11, x15
173-
; xmov x12, x15
174-
; xmov x13, x15
175-
; xmov x14, x15
176-
; call4 x15, x15, x15, x15, 0x0 // target = 0x40
177-
; pop_frame_restore 48,
157+
; push_frame_save 64,
158+
; xzero x14
159+
; xstore64le_offset8 sp, 0, x14
160+
; xstore64le_offset8 sp, 8, x14
161+
; xstore64le_offset8 sp, 16, x14
162+
; xstore64le_offset8 sp, 24, x14
163+
; xstore64le_offset8 sp, 32, x14
164+
; xstore64le_offset8 sp, 40, x14
165+
; xstore64le_offset8 sp, 48, x14
166+
; xmov x4, x14
167+
; xmov x5, x14
168+
; xmov x6, x14
169+
; xmov x7, x14
170+
; xmov x8, x14
171+
; xmov x9, x14
172+
; xmov x10, x14
173+
; xmov x11, x14
174+
; xmov x12, x14
175+
; xmov x13, x14
176+
; call4 x14, x14, x14, x14, 0x0 // target = 0x41
177+
; pop_frame_restore 64,
178178
; ret
179179

180180
function %colocated_stack_rets() -> i64 {
@@ -214,82 +214,80 @@ block0:
214214
}
215215

216216
; VCode:
217-
; push_frame_save 112, {x17, x18, x20, x21, x22, x23, x29}
217+
; push_frame_save 112, {x16, x18, x19, x20, x21, x22, x24, x28}
218218
; block0:
219219
; x12 = load_addr OutgoingArg(0)
220-
; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }, CallRetPair { vreg: Writable { reg: p15i }, preg: p15i }], clobbers: PRegSet { bits: [0, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 }
221-
; xmov x20, x13
222-
; xmov x22, x11
223-
; x29 = xload64 OutgoingArg(0) // flags = notrap aligned
224-
; x11 = xload64 OutgoingArg(8) // flags = notrap aligned
225-
; x13 = xload64 OutgoingArg(16) // flags = notrap aligned
226-
; x21 = xload64 OutgoingArg(24) // flags = notrap aligned
227-
; x23 = xload64 OutgoingArg(32) // flags = notrap aligned
228-
; xadd64 x18, x0, x1
229-
; xadd64 x17, x2, x3
230-
; xadd64 x5, x4, x5
231-
; xadd64 x6, x6, x7
232-
; xadd64 x7, x8, x9
233-
; xmov x0, x22
234-
; xadd64 x4, x10, x0
235-
; xmov x10, x20
236-
; xadd64 x8, x12, x10
237-
; xadd64 x14, x14, x15
238-
; xadd64 x15, x29, x11
239-
; xadd64 x13, x11, x13
240-
; xadd64 x0, x21, x23
241-
; xadd64 x1, x18, x17
242-
; xadd64 x2, x5, x6
243-
; xadd64 x3, x7, x4
244-
; xadd64 x14, x8, x14
245-
; xadd64 x13, x15, x13
246-
; xadd64 x15, x0, x0
247-
; xadd64 x0, x1, x2
220+
; call CallInfo { dest: PulleyCall { name: TestCase(%g), args: [XReg(p12i)] }, uses: [], defs: [CallRetPair { vreg: Writable { reg: p0i }, preg: p0i }, CallRetPair { vreg: Writable { reg: p1i }, preg: p1i }, CallRetPair { vreg: Writable { reg: p2i }, preg: p2i }, CallRetPair { vreg: Writable { reg: p3i }, preg: p3i }, CallRetPair { vreg: Writable { reg: p4i }, preg: p4i }, CallRetPair { vreg: Writable { reg: p5i }, preg: p5i }, CallRetPair { vreg: Writable { reg: p6i }, preg: p6i }, CallRetPair { vreg: Writable { reg: p7i }, preg: p7i }, CallRetPair { vreg: Writable { reg: p8i }, preg: p8i }, CallRetPair { vreg: Writable { reg: p9i }, preg: p9i }, CallRetPair { vreg: Writable { reg: p10i }, preg: p10i }, CallRetPair { vreg: Writable { reg: p11i }, preg: p11i }, CallRetPair { vreg: Writable { reg: p12i }, preg: p12i }, CallRetPair { vreg: Writable { reg: p13i }, preg: p13i }, CallRetPair { vreg: Writable { reg: p14i }, preg: p14i }], clobbers: PRegSet { bits: [32768, 65535, 4294967295, 0] }, callee_conv: Fast, caller_conv: Fast, callee_pop_size: 0 }
221+
; xmov x21, x12
222+
; x28 = xload64 OutgoingArg(0) // flags = notrap aligned
223+
; x16 = xload64 OutgoingArg(8) // flags = notrap aligned
224+
; x12 = xload64 OutgoingArg(16) // flags = notrap aligned
225+
; x15 = xload64 OutgoingArg(24) // flags = notrap aligned
226+
; x22 = xload64 OutgoingArg(32) // flags = notrap aligned
227+
; x24 = xload64 OutgoingArg(40) // flags = notrap aligned
228+
; xadd64 x20, x0, x1
229+
; xadd64 x19, x2, x3
230+
; xadd64 x18, x4, x5
231+
; xadd64 x4, x6, x7
232+
; xadd64 x5, x8, x9
233+
; xadd64 x2, x10, x11
234+
; xmov x11, x21
235+
; xadd64 x3, x11, x13
236+
; xadd64 x14, x14, x28
237+
; xadd64 x0, x16, x12
238+
; xadd64 x15, x12, x15
239+
; xadd64 x1, x22, x24
240+
; xadd64 x6, x20, x19
241+
; xadd64 x4, x18, x4
242+
; xadd64 x2, x5, x2
248243
; xadd64 x14, x3, x14
249-
; xadd64 x13, x13, x15
250-
; xadd64 x14, x0, x14
251-
; xadd64 x13, x13, x13
252-
; xadd64 x0, x14, x13
253-
; pop_frame_restore 112, {x17, x18, x20, x21, x22, x23, x29}
244+
; xadd64 x15, x0, x15
245+
; xadd64 x0, x1, x1
246+
; xadd64 x1, x6, x4
247+
; xadd64 x14, x2, x14
248+
; xadd64 x15, x15, x0
249+
; xadd64 x14, x1, x14
250+
; xadd64 x15, x15, x15
251+
; xadd64 x0, x14, x15
252+
; pop_frame_restore 112, {x16, x18, x19, x20, x21, x22, x24, x28}
254253
; ret
255254
;
256255
; Disassembled:
257-
; push_frame_save 112, x17, x18, x20, x21, x22, x23, x29
256+
; push_frame_save 112, x16, x18, x19, x20, x21, x22, x24, x28
258257
; xmov x12, sp
259258
; call1 x12, 0x0 // target = 0x8
260-
; xmov x20, x13
261-
; xmov x22, x11
262-
; xload64le_offset8 x29, sp, 0
263-
; xload64le_offset8 x11, sp, 8
264-
; xload64le_offset8 x13, sp, 16
265-
; xload64le_offset8 x21, sp, 24
266-
; xload64le_offset8 x23, sp, 32
267-
; xadd64 x18, x0, x1
268-
; xadd64 x17, x2, x3
269-
; xadd64 x5, x4, x5
270-
; xadd64 x6, x6, x7
271-
; xadd64 x7, x8, x9
272-
; xmov x0, x22
273-
; xadd64 x4, x10, x0
274-
; xmov x10, x20
275-
; xadd64 x8, x12, x10
276-
; xadd64 x14, x14, x15
277-
; xadd64 x15, x29, x11
278-
; xadd64 x13, x11, x13
279-
; xadd64 x0, x21, x23
280-
; xadd64 x1, x18, x17
281-
; xadd64 x2, x5, x6
282-
; xadd64 x3, x7, x4
283-
; xadd64 x14, x8, x14
284-
; xadd64 x13, x15, x13
285-
; xadd64 x15, x0, x0
286-
; xadd64 x0, x1, x2
259+
; xmov x21, x12
260+
; xload64le_offset8 x28, sp, 0
261+
; xload64le_offset8 x16, sp, 8
262+
; xload64le_offset8 x12, sp, 16
263+
; xload64le_offset8 x15, sp, 24
264+
; xload64le_offset8 x22, sp, 32
265+
; xload64le_offset8 x24, sp, 40
266+
; xadd64 x20, x0, x1
267+
; xadd64 x19, x2, x3
268+
; xadd64 x18, x4, x5
269+
; xadd64 x4, x6, x7
270+
; xadd64 x5, x8, x9
271+
; xadd64 x2, x10, x11
272+
; xmov x11, x21
273+
; xadd64 x3, x11, x13
274+
; xadd64 x14, x14, x28
275+
; xadd64 x0, x16, x12
276+
; xadd64 x15, x12, x15
277+
; xadd64 x1, x22, x24
278+
; xadd64 x6, x20, x19
279+
; xadd64 x4, x18, x4
280+
; xadd64 x2, x5, x2
287281
; xadd64 x14, x3, x14
288-
; xadd64 x13, x13, x15
289-
; xadd64 x14, x0, x14
290-
; xadd64 x13, x13, x13
291-
; xadd64 x0, x14, x13
292-
; pop_frame_restore 112, x17, x18, x20, x21, x22, x23, x29
282+
; xadd64 x15, x0, x15
283+
; xadd64 x0, x1, x1
284+
; xadd64 x1, x6, x4
285+
; xadd64 x14, x2, x14
286+
; xadd64 x15, x15, x0
287+
; xadd64 x14, x1, x14
288+
; xadd64 x15, x15, x15
289+
; xadd64 x0, x14, x15
290+
; pop_frame_restore 112, x16, x18, x19, x20, x21, x22, x24, x28
293291
; ret
294292

295293
function %call_indirect(i32) -> i64 {

0 commit comments

Comments
 (0)