Skip to content

Commit 73d4f89

Browse files
committed
cranelift: Move blockparams to critical edge blocks
This lets us stop allocating temporary VRegs for critical edges that have block parameters. That makes the register allocation problem a little smaller, and also allows reusing lower_branch_blockparam_args for all block parameters. Fixes bytecodealliance#7639, and unblocks bytecodealliance/regalloc2#170
1 parent 27c1c1d commit 73d4f89

File tree

6 files changed

+170
-172
lines changed

6 files changed

+170
-172
lines changed

cranelift/codegen/src/machinst/lower.rs

Lines changed: 57 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -950,33 +950,46 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
950950
let loc = self.srcloc(branch);
951951
self.finish_ir_inst(loc);
952952
// Add block param outputs for current block.
953-
self.lower_branch_blockparam_args(bindex);
953+
match self.vcode.block_order().succ_indices(bindex).1 {
954+
&[succ] => self.lower_branch_blockparam_args(branch, 0, succ),
955+
succs => {
956+
// If there are multiple edges, either the destination has
957+
// multiple predecessors and there is a split critical edge
958+
// block that can hold these block params, or it has one
959+
// predecessor and doesn't need block params.
960+
debug_assert!(succs
961+
.iter()
962+
.zip(self.f.dfg.insts[branch].branch_destination(&self.f.dfg.jump_tables))
963+
.all(|(succ, block_call)| {
964+
matches!(
965+
self.vcode.block_order().lowered_order()[succ.index()],
966+
LoweredBlock::CriticalEdge { .. }
967+
) || block_call.args_slice(&self.f.dfg.value_lists).is_empty()
968+
}));
969+
let succs: SmallVec<[BlockIndex; 2]> = SmallVec::from_slice(succs);
970+
for succ in succs {
971+
self.vcode.add_succ(succ, &[]);
972+
}
973+
}
974+
}
954975
Ok(())
955976
}
956977

957-
fn lower_branch_blockparam_args(&mut self, block: BlockIndex) {
958-
// TODO: why not make `block_order` public?
959-
for succ_idx in 0..self.vcode.block_order().succ_indices(block).1.len() {
960-
// Avoid immutable borrow by explicitly indexing.
961-
let (opt_inst, succs) = self.vcode.block_order().succ_indices(block);
962-
let inst = opt_inst.expect("lower_branch_blockparam_args called on a critical edge!");
963-
let succ = succs[succ_idx];
964-
965-
// The use of `succ_idx` to index `branch_destination` is valid on the assumption that
966-
// the traversal order defined in `visit_block_succs` mirrors the order returned by
967-
// `branch_destination`. If that assumption is violated, the branch targets returned
968-
// here will not match the clif.
969-
let branches = self.f.dfg.insts[inst].branch_destination(&self.f.dfg.jump_tables);
970-
let branch_args = branches[succ_idx].args_slice(&self.f.dfg.value_lists);
971-
972-
let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
973-
for &arg in branch_args {
974-
let arg = self.f.dfg.resolve_aliases(arg);
975-
let regs = self.put_value_in_regs(arg);
976-
branch_arg_vregs.extend_from_slice(regs.regs());
977-
}
978-
self.vcode.add_succ(succ, &branch_arg_vregs[..]);
978+
fn lower_branch_blockparam_args(&mut self, inst: Inst, succ_idx: usize, succ: BlockIndex) {
979+
// The use of `succ_idx` to index `branch_destination` is valid on the assumption that
980+
// the traversal order defined in `visit_block_succs` mirrors the order returned by
981+
// `branch_destination`. If that assumption is violated, the branch targets returned
982+
// here will not match the clif.
983+
let branches = self.f.dfg.insts[inst].branch_destination(&self.f.dfg.jump_tables);
984+
let branch_args = branches[succ_idx].args_slice(&self.f.dfg.value_lists);
985+
986+
let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
987+
for &arg in branch_args {
988+
let arg = self.f.dfg.resolve_aliases(arg);
989+
let regs = self.put_value_in_regs(arg);
990+
branch_arg_vregs.extend_from_slice(regs.regs());
979991
}
992+
self.vcode.add_succ(succ, &branch_arg_vregs[..]);
980993
}
981994

982995
fn collect_branches_and_targets(
@@ -1028,44 +1041,29 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
10281041
// `lower_clif_block()` for rationale).
10291042

10301043
// End branches.
1031-
if let Some(bb) = lb.orig_block() {
1032-
if let Some(branch) = self.collect_branches_and_targets(bindex, bb, &mut targets) {
1033-
self.lower_clif_branches(backend, bindex, bb, branch, &targets)?;
1034-
self.finish_ir_inst(self.srcloc(branch));
1035-
}
1036-
} else {
1037-
// If no orig block, this must be a pure edge block;
1038-
// get the successor and emit a jump. Add block params
1039-
// according to the one successor, and pass them
1040-
// through; note that the successor must have an
1041-
// original block.
1042-
let (_, succs) = self.vcode.block_order().succ_indices(bindex);
1043-
let succ = succs[0];
1044-
1045-
let orig_succ = lowered_order[succ.index()];
1046-
let orig_succ = orig_succ
1047-
.orig_block()
1048-
.expect("Edge block succ must be body block");
1049-
1050-
let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
1051-
for ty in self.f.dfg.block_param_types(orig_succ) {
1052-
let regs = self.vregs.alloc(ty)?;
1053-
for &reg in regs.regs() {
1054-
branch_arg_vregs.push(reg);
1055-
let vreg = reg.to_virtual_reg().unwrap();
1056-
self.vcode.add_block_param(vreg);
1044+
match lb {
1045+
&LoweredBlock::Orig { block: bb } => {
1046+
if let Some(branch) =
1047+
self.collect_branches_and_targets(bindex, bb, &mut targets)
1048+
{
1049+
self.lower_clif_branches(backend, bindex, bb, branch, &targets)?;
1050+
self.finish_ir_inst(self.srcloc(branch));
10571051
}
1058-
}
1059-
self.vcode.add_succ(succ, &branch_arg_vregs[..]);
10601052

1061-
self.emit(I::gen_jump(MachLabel::from_block(succ)));
1062-
self.finish_ir_inst(Default::default());
1063-
}
1064-
1065-
// Original block body.
1066-
if let Some(bb) = lb.orig_block() {
1067-
self.lower_clif_block(backend, bb, ctrl_plane)?;
1068-
self.emit_value_label_markers_for_block_args(bb);
1053+
// Original block body.
1054+
self.lower_clif_block(backend, bb, ctrl_plane)?;
1055+
self.emit_value_label_markers_for_block_args(bb);
1056+
}
1057+
&LoweredBlock::CriticalEdge { pred, succ_idx, .. } => {
1058+
// Emit a jump to the successor, placing the block params
1059+
// that the predecessor was going to pass along here.
1060+
let (_, succs) = self.vcode.block_order().succ_indices(bindex);
1061+
let succ = succs[0];
1062+
let branch = self.f.layout.last_inst(pred).unwrap();
1063+
self.lower_branch_blockparam_args(branch, succ_idx as usize, succ);
1064+
self.emit(I::gen_jump(MachLabel::from_block(succ)));
1065+
self.finish_ir_inst(Default::default());
1066+
}
10691067
}
10701068

10711069
if bindex.index() == 0 {

cranelift/filetests/filetests/isa/aarch64/cold-blocks.clif

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ block2:
1616

1717
; VCode:
1818
; block0:
19-
; mov w5, w0
20-
; cbnz x5, label1 ; b label2
19+
; mov w4, w0
20+
; cbnz x4, label1 ; b label2
2121
; block1:
2222
; b label3
2323
; block2:
@@ -28,8 +28,8 @@ block2:
2828
;
2929
; Disassembled:
3030
; block0: ; offset 0x0
31-
; mov w5, w0
32-
; cbnz x5, #0xc
31+
; mov w4, w0
32+
; cbnz x4, #0xc
3333
; block1: ; offset 0x8
3434
; mov w0, #0x61
3535
; block2: ; offset 0xc
@@ -49,8 +49,8 @@ block2 cold:
4949

5050
; VCode:
5151
; block0:
52-
; mov w5, w0
53-
; cbnz x5, label1 ; b label2
52+
; mov w4, w0
53+
; cbnz x4, label1 ; b label2
5454
; block1:
5555
; b label3
5656
; block3:
@@ -61,8 +61,8 @@ block2 cold:
6161
;
6262
; Disassembled:
6363
; block0: ; offset 0x0
64-
; mov w5, w0
65-
; cbz x5, #0xc
64+
; mov w4, w0
65+
; cbz x4, #0xc
6666
; block1: ; offset 0x8
6767
; ret
6868
; block2: ; offset 0xc

cranelift/filetests/filetests/isa/riscv64/bitops-float.clif

Lines changed: 28 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,52 +22,52 @@ block1(v4: f32):
2222
; VCode:
2323
; block0:
2424
; li a0,0
25-
; li a1,0
25+
; li a5,0
26+
; fmv.w.x fa1,a5
27+
; fmv.x.w a4,fa1
28+
; not a1,a4
29+
; fmv.w.x fa2,a1
30+
; fmv.x.w a3,fa2
31+
; fmv.x.w a5,fa2
32+
; or a1,a3,a5
2633
; fmv.w.x fa3,a1
27-
; fmv.x.w a1,fa3
28-
; not a2,a1
29-
; fmv.w.x fa4,a2
30-
; fmv.x.w a5,fa4
31-
; fmv.x.w a1,fa4
32-
; or a3,a5,a1
33-
; fmv.w.x fa4,a3
34-
; br_table a0,[MachLabel(1),MachLabel(2)]##tmp1=a1,tmp2=a2
34+
; br_table a0,[MachLabel(1),MachLabel(2)]##tmp1=a5,tmp2=a1
3535
; block1:
3636
; j label3
3737
; block2:
38-
; fmv.d fa4,fa3
38+
; fmv.d fa3,fa1
3939
; j label3
4040
; block3:
4141
; ret
4242
;
4343
; Disassembled:
4444
; block0: ; offset 0x0
4545
; mv a0, zero
46-
; mv a1, zero
46+
; mv a5, zero
47+
; fmv.w.x fa1, a5
48+
; fmv.x.w a4, fa1
49+
; not a1, a4
50+
; fmv.w.x fa2, a1
51+
; fmv.x.w a3, fa2
52+
; fmv.x.w a5, fa2
53+
; or a1, a3, a5
4754
; fmv.w.x fa3, a1
48-
; fmv.x.w a1, fa3
49-
; not a2, a1
50-
; fmv.w.x fa4, a2
51-
; fmv.x.w a5, fa4
52-
; fmv.x.w a1, fa4
53-
; or a3, a5, a1
54-
; fmv.w.x fa4, a3
5555
; slli t6, a0, 0x20
5656
; srli t6, t6, 0x20
57-
; addi a2, zero, 1
58-
; bltu t6, a2, 0xc
59-
; auipc a2, 0
60-
; jalr zero, a2, 0x28
57+
; addi a1, zero, 1
58+
; bltu t6, a1, 0xc
6159
; auipc a1, 0
62-
; slli a2, t6, 3
63-
; add a1, a1, a2
64-
; jalr zero, a1, 0x10
65-
; auipc a2, 0
66-
; jalr zero, a2, 0xc
60+
; jalr zero, a1, 0x28
61+
; auipc a5, 0
62+
; slli a1, t6, 3
63+
; add a5, a5, a1
64+
; jalr zero, a5, 0x10
65+
; auipc a1, 0
66+
; jalr zero, a1, 0xc
6767
; block1: ; offset 0x58
6868
; j 8
6969
; block2: ; offset 0x5c
70-
; fmv.d fa4, fa3
70+
; fmv.d fa3, fa1
7171
; block3: ; offset 0x60
7272
; ret
7373

cranelift/filetests/filetests/isa/riscv64/cold-blocks.clif

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ block2:
1616

1717
; VCode:
1818
; block0:
19-
; sext.w a5,a0
20-
; bne a5,zero,taken(label1),not_taken(label2)
19+
; sext.w a4,a0
20+
; bne a4,zero,taken(label1),not_taken(label2)
2121
; block1:
2222
; j label3
2323
; block2:
@@ -28,8 +28,8 @@ block2:
2828
;
2929
; Disassembled:
3030
; block0: ; offset 0x0
31-
; sext.w a5, a0
32-
; bnez a5, 8
31+
; sext.w a4, a0
32+
; bnez a4, 8
3333
; block1: ; offset 0x8
3434
; addi a0, zero, 0x61
3535
; block2: ; offset 0xc
@@ -49,8 +49,8 @@ block2 cold:
4949

5050
; VCode:
5151
; block0:
52-
; sext.w a5,a0
53-
; bne a5,zero,taken(label1),not_taken(label2)
52+
; sext.w a4,a0
53+
; bne a4,zero,taken(label1),not_taken(label2)
5454
; block1:
5555
; j label3
5656
; block3:
@@ -61,8 +61,8 @@ block2 cold:
6161
;
6262
; Disassembled:
6363
; block0: ; offset 0x0
64-
; sext.w a5, a0
65-
; beqz a5, 8
64+
; sext.w a4, a0
65+
; beqz a4, 8
6666
; block1: ; offset 0x8
6767
; ret
6868
; block2: ; offset 0xc

0 commit comments

Comments
 (0)