Skip to content

Commit 8001451

Browse files
authored
s390x: Remove return-value instructions after calls at callsites (#10531)
This is a follow-on to #10502 implementing the same logic for s390x. Like other platforms, we now no longer emit any instructions handling return values after the call instruction; instead, everything is done within a pseudo Call instruction. Unlike other platforms, this also has to handle vector lane swapping when calling between ABIs that mix BE and LE lane orders. The pseudo Call instruction needs enough type information to make this choice, therefore I had to add a type field to the RetLocation::Reg case (the ::Stack case already had one). All other changes are contained within the s390x back-end. To simplify the implementation, the patch also implements a number of clean-ups: - Introduce a MemArg::SpillOffset variant - Remove the (unnecessary on this platform) island code size check - Merge the CallInd instructions into the base Call instructions, using a new CallInstDest type to carry the call destination
1 parent b772c31 commit 8001451

File tree

21 files changed

+538
-542
lines changed

21 files changed

+538
-542
lines changed

cranelift/codegen/src/isa/aarch64/inst/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -841,7 +841,7 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
841841
}
842842
for CallRetPair { vreg, location } in defs {
843843
match location {
844-
RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg),
844+
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
845845
RetLocation::Stack(..) => collector.any_def(vreg),
846846
}
847847
}
@@ -857,7 +857,7 @@ fn aarch64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
857857
}
858858
for CallRetPair { vreg, location } in defs {
859859
match location {
860-
RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg),
860+
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
861861
RetLocation::Stack(..) => collector.any_def(vreg),
862862
}
863863
}

cranelift/codegen/src/isa/pulley_shared/inst/mod.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -171,7 +171,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
171171
}
172172
for CallRetPair { vreg, location } in defs {
173173
match location {
174-
RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg),
174+
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
175175
RetLocation::Stack(..) => collector.any_def(vreg),
176176
}
177177
}
@@ -184,7 +184,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
184184
}
185185
for CallRetPair { vreg, location } in defs {
186186
match location {
187-
RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg),
187+
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
188188
RetLocation::Stack(..) => collector.any_def(vreg),
189189
}
190190
}
@@ -198,7 +198,7 @@ fn pulley_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
198198
}
199199
for CallRetPair { vreg, location } in defs {
200200
match location {
201-
RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg),
201+
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
202202
RetLocation::Stack(..) => collector.any_def(vreg),
203203
}
204204
}

cranelift/codegen/src/isa/riscv64/inst/mod.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,7 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
337337
}
338338
for CallRetPair { vreg, location } in defs {
339339
match location {
340-
RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg),
340+
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
341341
RetLocation::Stack(..) => collector.any_def(vreg),
342342
}
343343
}
@@ -353,7 +353,7 @@ fn riscv64_get_operands(inst: &mut Inst, collector: &mut impl OperandVisitor) {
353353
}
354354
for CallRetPair { vreg, location } in defs {
355355
match location {
356-
RetLocation::Reg(preg) => collector.reg_fixed_def(vreg, *preg),
356+
RetLocation::Reg(preg, ..) => collector.reg_fixed_def(vreg, *preg),
357357
RetLocation::Stack(..) => collector.any_def(vreg),
358358
}
359359
}

cranelift/codegen/src/isa/s390x/abi.rs

Lines changed: 119 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,16 @@ impl Into<MemArg> for StackAMode {
264264
}
265265
}
266266

267+
/// Lane order to be used for a given calling convention.
268+
impl From<isa::CallConv> for LaneOrder {
269+
fn from(call_conv: isa::CallConv) -> Self {
270+
match call_conv {
271+
isa::CallConv::Tail => LaneOrder::LittleEndian,
272+
_ => LaneOrder::BigEndian,
273+
}
274+
}
275+
}
276+
267277
/// S390x-specific ABI behavior. This struct just serves as an implementation
268278
/// point for the trait; it is never actually instantiated.
269279
pub struct S390xMachineDeps;
@@ -1001,8 +1011,8 @@ impl S390xMachineDeps {
10011011
pub fn gen_tail_epilogue(
10021012
frame_layout: &FrameLayout,
10031013
callee_pop_size: u32,
1004-
target_reg: Option<&mut Reg>,
1005-
) -> SmallVec<[Inst; 16]> {
1014+
dest: &CallInstDest,
1015+
) -> (SmallVec<[Inst; 16]>, Option<Reg>) {
10061016
let mut insts = SmallVec::new();
10071017
let call_conv = isa::CallConv::Tail;
10081018

@@ -1012,19 +1022,122 @@ impl S390xMachineDeps {
10121022
// If the tail call target is in a callee-saved GPR, we need to move it
10131023
// to %r1 (as the only available temp register) before restoring GPRs
10141024
// (but after restoring FPRs, which might clobber %r1).
1015-
if let Some(reg) = target_reg {
1016-
if is_reg_saved_in_prologue(call_conv, reg.to_real_reg().unwrap()) {
1025+
let temp_dest = match dest {
1026+
CallInstDest::Indirect { reg }
1027+
if is_reg_saved_in_prologue(call_conv, reg.to_real_reg().unwrap()) =>
1028+
{
10171029
insts.push(Inst::Mov64 {
10181030
rd: writable_gpr(1),
10191031
rm: *reg,
10201032
});
1021-
*reg = gpr(1);
1033+
Some(gpr(1))
10221034
}
1023-
}
1035+
_ => None,
1036+
};
10241037

10251038
// Restore GPRs (including SP).
10261039
insts.extend(gen_restore_gprs(call_conv, frame_layout, callee_pop_size));
10271040

1041+
(insts, temp_dest)
1042+
}
1043+
1044+
/// Emit loads for any stack-carried return values using the call
1045+
/// info and allocations. In addition, emit lane swaps for all
1046+
/// vector-types return values if needed.
1047+
pub fn gen_retval_loads(info: &CallInfo<CallInstDest>) -> SmallInstVec<Inst> {
1048+
let mut insts = SmallVec::new();
1049+
1050+
// Helper routine to lane-swap a register if needed.
1051+
let lane_swap_if_needed = |insts: &mut SmallInstVec<Inst>, vreg, ty: Type| {
1052+
if LaneOrder::from(info.caller_conv) != LaneOrder::from(info.callee_conv) {
1053+
if ty.is_vector() {
1054+
if ty.lane_count() >= 2 {
1055+
insts.push(Inst::VecPermuteDWImm {
1056+
rd: vreg,
1057+
rn: vreg.to_reg(),
1058+
rm: vreg.to_reg(),
1059+
idx1: 1,
1060+
idx2: 0,
1061+
});
1062+
}
1063+
if ty.lane_count() >= 4 {
1064+
insts.push(Inst::VecShiftRR {
1065+
shift_op: VecShiftOp::RotL64x2,
1066+
rd: vreg,
1067+
rn: vreg.to_reg(),
1068+
shift_imm: 32,
1069+
shift_reg: zero_reg(),
1070+
});
1071+
}
1072+
if ty.lane_count() >= 8 {
1073+
insts.push(Inst::VecShiftRR {
1074+
shift_op: VecShiftOp::RotL32x4,
1075+
rd: vreg,
1076+
rn: vreg.to_reg(),
1077+
shift_imm: 16,
1078+
shift_reg: zero_reg(),
1079+
});
1080+
}
1081+
if ty.lane_count() >= 16 {
1082+
insts.push(Inst::VecShiftRR {
1083+
shift_op: VecShiftOp::RotL16x8,
1084+
rd: vreg,
1085+
rn: vreg.to_reg(),
1086+
shift_imm: 8,
1087+
shift_reg: zero_reg(),
1088+
});
1089+
}
1090+
}
1091+
}
1092+
};
1093+
1094+
// Helper routine to allocate a temp register for ty.
1095+
let temp_reg = |ty| match Inst::rc_for_type(ty).unwrap() {
1096+
(&[RegClass::Int], _) => writable_gpr(0),
1097+
(&[RegClass::Float], _) => writable_vr(1),
1098+
_ => unreachable!(),
1099+
};
1100+
1101+
// Do a first pass over the return locations to handle copies that
1102+
// need temp registers. These need to be done before regular stack
1103+
// loads in case the destination of a load happens to be our temp
1104+
// register. (The temp registers by choice are distinct from all
1105+
// real return registers, which we verify here again.)
1106+
for CallRetPair { vreg, location } in &info.defs {
1107+
match location {
1108+
RetLocation::Reg(preg, ty) => {
1109+
debug_assert!(*preg != temp_reg(*ty).to_reg());
1110+
}
1111+
RetLocation::Stack(amode, ty) => {
1112+
if let Some(spillslot) = vreg.to_reg().to_spillslot() {
1113+
let temp = temp_reg(*ty);
1114+
insts.push(Inst::gen_load(temp, (*amode).into(), *ty));
1115+
lane_swap_if_needed(&mut insts, temp, *ty);
1116+
insts.push(Inst::gen_store(
1117+
MemArg::SpillOffset {
1118+
off: 8 * (spillslot.index() as i64),
1119+
},
1120+
temp.to_reg(),
1121+
Inst::canonical_type_for_rc(temp.to_reg().class()),
1122+
));
1123+
}
1124+
}
1125+
}
1126+
}
1127+
// Now handle all remaining return locations.
1128+
for CallRetPair { vreg, location } in &info.defs {
1129+
match location {
1130+
RetLocation::Reg(preg, ty) => {
1131+
lane_swap_if_needed(&mut insts, Writable::from_reg(*preg), *ty);
1132+
}
1133+
RetLocation::Stack(amode, ty) => {
1134+
if vreg.to_reg().to_spillslot().is_none() {
1135+
insts.push(Inst::gen_load(*vreg, (*amode).into(), *ty));
1136+
lane_swap_if_needed(&mut insts, *vreg, *ty);
1137+
}
1138+
}
1139+
}
1140+
}
10281141
insts
10291142
}
10301143
}

0 commit comments

Comments
 (0)