Skip to content

Commit 44e4919

Browse files
authored
Cranelift: remove block params on critical-edge blocks. (#10485)
When a block has a terminator branch that targets two or more other blocks at the CLIF level, and any of these blocks have two or more precessors, the edge is a "critical edge" and we split it (insert a new empty block) so that the register allocator has a place to put moves that happen only on that edge. Otherwise, there is no location that works: in the predecessor, code runs no matter which outgoing edge we take; and in the successor, code runs no matter which incoming edge we came from. Currently, when we generate these critical-edge blocks, we insert exactly one instruction: an unconditional branch. We wire up the blockparam dataflow by (i) adding block parameters to the critical-edge block with the same signature as the original target, and (ii) adding all of these arguments to the unconditional branch. In other words, we maintain the original block signature throughout. This is fine and correct, but it has two downsides. The first is a minor loss in compile-time efficiency (more SSA values and block-params to process). The second, more interesting, is that it hinders future work with certain kinds of branches that may define values *on edges*. In particular, this approach prevents exception-handling support: a `try_call` instruction that acts as a terminator branch (with normal-return and exceptional out-edges) defines normal-return values as block-call arguments that are usable on the normal-return edge. Some of these normal-return values may be defined by loads from a return-value area. These loads need somewhere to go; they can't go "after the terminator" (then it wouldn't be a terminator), so they go in an edge block; as a result, the block-call for the normal-return needs to use its arguments only in the unconditional branch out of the edge block, not in the initial branch to the edge block. This PR alters the critical-edge blockparam handling to have no block-call args on the branch into the edge block, and use the original values (not the newly defined edge-block blockparams) in the block-call out of the edge block. This will allow these values to be possibly defined in the edge block rather than in the predecessor (the block with the original terminator). This has no functional change today other than some perturbation of regalloc decisions and a possibly slight compile-time speedup.
1 parent 2de55cc commit 44e4919

File tree

10 files changed

+303
-203
lines changed

10 files changed

+303
-203
lines changed

cranelift/codegen/src/machinst/blockorder.rs

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -132,7 +132,6 @@ impl LoweredBlock {
132132
}
133133

134134
/// The associated out-edge successor, if this is a critical edge.
135-
#[cfg(test)]
136135
pub fn out_edge(&self) -> Option<Block> {
137136
match self {
138137
&LoweredBlock::CriticalEdge { succ, .. } => Some(succ),

cranelift/codegen/src/machinst/lower.rs

Lines changed: 67 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -872,7 +872,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
872872
self.vcode.end_bb();
873873
}
874874

875-
fn lower_clif_branches<B: LowerBackend<MInst = I>>(
875+
fn lower_clif_branch<B: LowerBackend<MInst = I>>(
876876
&mut self,
877877
backend: &B,
878878
// Lowered block index:
@@ -883,7 +883,7 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
883883
targets: &[MachLabel],
884884
) -> CodegenResult<()> {
885885
trace!(
886-
"lower_clif_branches: block {} branch {:?} targets {:?}",
886+
"lower_clif_branch: block {} branch {:?} targets {:?}",
887887
block,
888888
branch,
889889
targets,
@@ -909,31 +909,17 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
909909
}
910910

911911
fn lower_branch_blockparam_args(&mut self, block: BlockIndex) {
912+
let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
913+
912914
// TODO: why not make `block_order` public?
913915
for succ_idx in 0..self.vcode.block_order().succ_indices(block).1.len() {
914-
// Avoid immutable borrow by explicitly indexing.
915-
let (opt_inst, succs) = self.vcode.block_order().succ_indices(block);
916-
let inst = opt_inst.expect("lower_branch_blockparam_args called on a critical edge!");
917-
let succ = succs[succ_idx];
918-
919-
// The use of `succ_idx` to index `branch_destination` is valid on the assumption that
920-
// the traversal order defined in `visit_block_succs` mirrors the order returned by
921-
// `branch_destination`. If that assumption is violated, the branch targets returned
922-
// here will not match the clif.
923-
let branches = self.f.dfg.insts[inst].branch_destination(&self.f.dfg.jump_tables);
924-
let branch_args = branches[succ_idx].args_slice(&self.f.dfg.value_lists);
925-
926-
let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
927-
for &arg in branch_args {
928-
debug_assert!(self.f.dfg.value_is_real(arg));
929-
let regs = self.put_value_in_regs(arg);
930-
branch_arg_vregs.extend_from_slice(regs.regs());
931-
}
932-
self.vcode.add_succ(succ, &branch_arg_vregs[..]);
916+
branch_arg_vregs.clear();
917+
let (succ, args) = self.collect_block_call(block, succ_idx, &mut branch_arg_vregs);
918+
self.vcode.add_succ(succ, args);
933919
}
934920
}
935921

936-
fn collect_branches_and_targets(
922+
fn collect_branch_and_targets(
937923
&self,
938924
bindex: BlockIndex,
939925
_bb: Block,
@@ -945,6 +931,56 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
945931
opt_inst
946932
}
947933

934+
/// Collect the outgoing block-call arguments for a given edge out
935+
/// of a lowered block.
936+
fn collect_block_call<'a>(
937+
&mut self,
938+
block: BlockIndex,
939+
succ_idx: usize,
940+
buffer: &'a mut SmallVec<[Reg; 16]>,
941+
) -> (BlockIndex, &'a [Reg]) {
942+
let block_order = self.vcode.block_order();
943+
let (_, succs) = block_order.succ_indices(block);
944+
let succ = succs[succ_idx];
945+
let this_lb = block_order.lowered_order()[block.index()];
946+
let succ_lb = block_order.lowered_order()[succ.index()];
947+
948+
let (branch_inst, succ_idx) = match (this_lb, succ_lb) {
949+
(_, LoweredBlock::CriticalEdge { .. }) => {
950+
// The successor is a split-critical-edge block. In this
951+
// case, this block-call has no arguments, and the
952+
// arguments go on the critical edge block's unconditional
953+
// branch instead.
954+
return (succ, &[]);
955+
}
956+
(LoweredBlock::CriticalEdge { pred, succ_idx, .. }, _) => {
957+
// This is a split-critical-edge block. In this case, our
958+
// block-call has the arguments that in the CLIF appear in
959+
// the predecessor's branch to this edge.
960+
let branch_inst = self.f.layout.last_inst(pred).unwrap();
961+
(branch_inst, succ_idx as usize)
962+
}
963+
964+
(this, _) => {
965+
let block = this.orig_block().unwrap();
966+
// Ordinary block, with an ordinary block as
967+
// successor. Take the arguments from the branch.
968+
let branch_inst = self.f.layout.last_inst(block).unwrap();
969+
(branch_inst, succ_idx)
970+
}
971+
};
972+
973+
let block_call =
974+
self.f.dfg.insts[branch_inst].branch_destination(&self.f.dfg.jump_tables)[succ_idx];
975+
let args = block_call.args_slice(&self.f.dfg.value_lists);
976+
for &arg in args {
977+
debug_assert!(self.f.dfg.value_is_real(arg));
978+
let regs = self.put_value_in_regs(arg);
979+
buffer.extend_from_slice(regs.regs());
980+
}
981+
(succ, &buffer[..])
982+
}
983+
948984
/// Lower the function.
949985
pub fn lower<B: LowerBackend<MInst = I>>(
950986
mut self,
@@ -981,39 +1017,22 @@ impl<'func, I: VCodeInst> Lower<'func, I> {
9811017
// Lower the block body in reverse order (see comment in
9821018
// `lower_clif_block()` for rationale).
9831019

984-
// End branches.
1020+
// End branch.
9851021
if let Some(bb) = lb.orig_block() {
986-
if let Some(branch) = self.collect_branches_and_targets(bindex, bb, &mut targets) {
987-
self.lower_clif_branches(backend, bindex, bb, branch, &targets)?;
1022+
if let Some(branch) = self.collect_branch_and_targets(bindex, bb, &mut targets) {
1023+
self.lower_clif_branch(backend, bindex, bb, branch, &targets)?;
9881024
self.finish_ir_inst(self.srcloc(branch));
9891025
}
9901026
} else {
9911027
// If no orig block, this must be a pure edge block;
992-
// get the successor and emit a jump. Add block params
993-
// according to the one successor, and pass them
994-
// through; note that the successor must have an
995-
// original block.
996-
let (_, succs) = self.vcode.block_order().succ_indices(bindex);
997-
let succ = succs[0];
998-
999-
let orig_succ = lowered_order[succ.index()];
1000-
let orig_succ = orig_succ
1001-
.orig_block()
1002-
.expect("Edge block succ must be body block");
1003-
1004-
let mut branch_arg_vregs: SmallVec<[Reg; 16]> = smallvec![];
1005-
for ty in self.f.dfg.block_param_types(orig_succ) {
1006-
let regs = self.vregs.alloc(ty)?;
1007-
for &reg in regs.regs() {
1008-
branch_arg_vregs.push(reg);
1009-
let vreg = reg.to_virtual_reg().unwrap();
1010-
self.vcode.add_block_param(vreg);
1011-
}
1012-
}
1013-
self.vcode.add_succ(succ, &branch_arg_vregs[..]);
1014-
1028+
// get the successor and emit a jump. This block has
1029+
// no block params; and this jump's block-call args
1030+
// will be filled in by
1031+
// `lower_branch_blockparam_args`.
1032+
let succ = self.vcode.block_order().succ_indices(bindex).1[0];
10151033
self.emit(I::gen_jump(MachLabel::from_block(succ)));
10161034
self.finish_ir_inst(Default::default());
1035+
self.lower_branch_blockparam_args(bindex);
10171036
}
10181037

10191038
// Original block body.

cranelift/filetests/filetests/isa/aarch64/cold-blocks.clif

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ block2:
1616

1717
; VCode:
1818
; block0:
19-
; mov w5, w0
20-
; cbnz x5, label1 ; b label2
19+
; mov w4, w0
20+
; cbnz x4, label1 ; b label2
2121
; block1:
2222
; b label3
2323
; block2:
@@ -28,8 +28,8 @@ block2:
2828
;
2929
; Disassembled:
3030
; block0: ; offset 0x0
31-
; mov w5, w0
32-
; cbnz x5, #0xc
31+
; mov w4, w0
32+
; cbnz x4, #0xc
3333
; block1: ; offset 0x8
3434
; mov w0, #0x61
3535
; block2: ; offset 0xc
@@ -49,8 +49,8 @@ block2 cold:
4949

5050
; VCode:
5151
; block0:
52-
; mov w5, w0
53-
; cbnz x5, label1 ; b label2
52+
; mov w4, w0
53+
; cbnz x4, label1 ; b label2
5454
; block1:
5555
; b label3
5656
; block3:
@@ -61,8 +61,8 @@ block2 cold:
6161
;
6262
; Disassembled:
6363
; block0: ; offset 0x0
64-
; mov w5, w0
65-
; cbz x5, #0xc
64+
; mov w4, w0
65+
; cbz x4, #0xc
6666
; block1: ; offset 0x8
6767
; ret
6868
; block2: ; offset 0xc

cranelift/filetests/filetests/isa/riscv64/bitops-float.clif

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,50 +22,50 @@ block1(v4: f32):
2222
; VCode:
2323
; block0:
2424
; li a0,0
25-
; fmv.w.x fa1,zero
26-
; fmv.x.w a5,fa1
27-
; not a1,a5
28-
; fmv.w.x fa3,a1
29-
; fmv.x.w a4,fa3
30-
; fmv.x.w a1,fa3
31-
; or a2,a4,a1
32-
; fmv.w.x fa2,a2
33-
; br_table a0,[MachLabel(1),MachLabel(2)]##tmp1=a2,tmp2=a1
25+
; fmv.w.x fa5,zero
26+
; fmv.x.w a3,fa5
27+
; not a5,a3
28+
; fmv.w.x fa1,a5
29+
; fmv.x.w a2,fa1
30+
; fmv.x.w a4,fa1
31+
; or a1,a2,a4
32+
; fmv.w.x fa2,a1
33+
; br_table a0,[MachLabel(1),MachLabel(2)]##tmp1=a4,tmp2=a5
3434
; block1:
3535
; j label3
3636
; block2:
37-
; fmv.d fa2,fa1
37+
; fmv.d fa2,fa5
3838
; j label3
3939
; block3:
4040
; ret
4141
;
4242
; Disassembled:
4343
; block0: ; offset 0x0
4444
; mv a0, zero
45-
; fmv.w.x fa1, zero
46-
; fmv.x.w a5, fa1
47-
; not a1, a5
48-
; fmv.w.x fa3, a1
49-
; fmv.x.w a4, fa3
50-
; fmv.x.w a1, fa3
51-
; or a2, a4, a1
52-
; fmv.w.x fa2, a2
45+
; fmv.w.x fa5, zero
46+
; fmv.x.w a3, fa5
47+
; not a5, a3
48+
; fmv.w.x fa1, a5
49+
; fmv.x.w a2, fa1
50+
; fmv.x.w a4, fa1
51+
; or a1, a2, a4
52+
; fmv.w.x fa2, a1
5353
; slli t6, a0, 0x20
5454
; srli t6, t6, 0x20
55-
; addi a1, zero, 1
56-
; bltu t6, a1, 0xc
57-
; auipc a1, 0
58-
; jalr zero, a1, 0x28
59-
; auipc a2, 0
60-
; slli a1, t6, 3
61-
; add a2, a2, a1
62-
; jalr zero, a2, 0x10
63-
; auipc a1, 0
64-
; jalr zero, a1, 0xc
55+
; addi a5, zero, 1
56+
; bltu t6, a5, 0xc
57+
; auipc a5, 0
58+
; jalr zero, a5, 0x28
59+
; auipc a4, 0
60+
; slli a5, t6, 3
61+
; add a4, a4, a5
62+
; jalr zero, a4, 0x10
63+
; auipc a5, 0
64+
; jalr zero, a5, 0xc
6565
; block1: ; offset 0x54
6666
; j 8
6767
; block2: ; offset 0x58
68-
; fmv.d fa2, fa1
68+
; fmv.d fa2, fa5
6969
; block3: ; offset 0x5c
7070
; ret
7171

cranelift/filetests/filetests/isa/riscv64/cold-blocks.clif

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,8 @@ block2:
1616

1717
; VCode:
1818
; block0:
19-
; sext.w a5,a0
20-
; bne a5,zero,taken(label1),not_taken(label2)
19+
; sext.w a4,a0
20+
; bne a4,zero,taken(label1),not_taken(label2)
2121
; block1:
2222
; j label3
2323
; block2:
@@ -28,8 +28,8 @@ block2:
2828
;
2929
; Disassembled:
3030
; block0: ; offset 0x0
31-
; sext.w a5, a0
32-
; bnez a5, 8
31+
; sext.w a4, a0
32+
; bnez a4, 8
3333
; block1: ; offset 0x8
3434
; addi a0, zero, 0x61
3535
; block2: ; offset 0xc
@@ -49,8 +49,8 @@ block2 cold:
4949

5050
; VCode:
5151
; block0:
52-
; sext.w a5,a0
53-
; bne a5,zero,taken(label1),not_taken(label2)
52+
; sext.w a4,a0
53+
; bne a4,zero,taken(label1),not_taken(label2)
5454
; block1:
5555
; j label3
5656
; block3:
@@ -61,8 +61,8 @@ block2 cold:
6161
;
6262
; Disassembled:
6363
; block0: ; offset 0x0
64-
; sext.w a5, a0
65-
; beqz a5, 8
64+
; sext.w a4, a0
65+
; beqz a4, 8
6666
; block1: ; offset 0x8
6767
; ret
6868
; block2: ; offset 0xc

0 commit comments

Comments
 (0)