Skip to content

Commit 9fbb8ff

Browse files
fitzgenbongjunj
authored andcommitted
Cranelift: introduce a function inliner (bytecodealliance#11210)
* Cranelift: introduce a function inliner This comit adds "inlining as a library" to Cranelift; it does _not_ provide a complete, off-the-shelf inlining solution. Cranelift's compilation context is per-function and does not encompass the full call graph. It does not know which functions are hot and which are cold, which have been marked the equivalent of `#[inline(always)]` versus `#[inline(never)]`, etc... Only the Cranelift user can understand these aspects of the full compilation pipeline, and these things can be very different between (say) Wasmtime and `cg_clif`. Therefore, this infrastructure does not attempt to define hueristics for when inlining a particular call is likely beneficial. This module only provides hooks for the Cranelift user to tell Cranelift whether a given call should be inlined or not, and the mechanics to inline a callee into a particular call site when the user directs Cranelift to do so. This commit also creates a new kind of filetest that will always inline calls to functions that have already been defined in the file. This lets us exercise the inliner in filetests. Fixes bytecodealliance#4127 * Address review feedback * Require callee bodies are pre-legalized
1 parent 80b5e63 commit 9fbb8ff

23 files changed

+2841
-10
lines changed

cranelift/codegen/src/context.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ use crate::dominator_tree::DominatorTree;
1414
use crate::dominator_tree::DominatorTreePreorder;
1515
use crate::egraph::EgraphPass;
1616
use crate::flowgraph::ControlFlowGraph;
17+
use crate::inline::{Inline, do_inlining};
1718
use crate::ir::Function;
1819
use crate::isa::TargetIsa;
1920
use crate::legalizer::simple_legalize;
@@ -193,6 +194,13 @@ impl Context {
193194
Ok(())
194195
}
195196

197+
/// Perform function call inlining.
198+
///
199+
/// Returns `true` if any function call was inlined, `false` otherwise.
200+
pub fn inline(&mut self, inliner: impl Inline) -> CodegenResult<bool> {
201+
do_inlining(&mut self.func, inliner)
202+
}
203+
196204
/// Compile the function,
197205
///
198206
/// Run the function through all the passes necessary to generate

cranelift/codegen/src/inline.rs

Lines changed: 1392 additions & 0 deletions
Large diffs are not rendered by default.

cranelift/codegen/src/ir/dfg.rs

Lines changed: 43 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,24 @@ impl Blocks {
6565
self.0.len()
6666
}
6767

68+
/// Reserves capacity for at least `additional` more elements to be
69+
/// inserted.
70+
pub fn reserve(&mut self, additional: usize) {
71+
self.0.reserve(additional);
72+
}
73+
6874
/// Returns `true` if the given block reference is valid.
6975
pub fn is_valid(&self, block: Block) -> bool {
7076
self.0.is_valid(block)
7177
}
78+
79+
/// Iterate over all blocks, regardless whether a block is actually inserted
80+
/// in the layout or not.
81+
///
82+
/// Iterates in creation order, not layout order.
83+
pub fn iter(&self) -> impl Iterator<Item = Block> {
84+
self.0.keys()
85+
}
7286
}
7387

7488
impl Index<Block> for Blocks {
@@ -107,13 +121,6 @@ pub struct DataFlowGraph {
107121
results: SecondaryMap<Inst, ValueList>,
108122

109123
/// User-defined stack maps.
110-
///
111-
/// Not to be confused with the stack maps that `regalloc2` produces. These
112-
/// are defined by the user in `cranelift-frontend`. These will eventually
113-
/// replace the stack maps support in `regalloc2`, but in the name of
114-
/// incrementalism and avoiding gigantic PRs that completely overhaul
115-
/// Cranelift and Wasmtime at the same time, we are allowing them to live in
116-
/// parallel for the time being.
117124
user_stack_maps: alloc::collections::BTreeMap<Inst, UserStackMapEntryVec>,
118125

119126
/// basic blocks in the function and their parameters.
@@ -337,6 +344,11 @@ impl DataFlowGraph {
337344
self.values.push(data.into())
338345
}
339346

347+
/// The number of values defined in this DFG.
348+
pub fn len_values(&self) -> usize {
349+
self.values.len()
350+
}
351+
340352
/// Get an iterator over all values.
341353
pub fn values<'a>(&'a self) -> Values<'a> {
342354
Values {
@@ -599,6 +611,30 @@ impl DataFlowGraph {
599611
assert!(opcode.is_safepoint());
600612
self.user_stack_maps.entry(inst).or_default().push(entry);
601613
}
614+
615+
/// Append multiple stack map entries for the given call instruction.
616+
///
617+
/// # Panics
618+
///
619+
/// Panics if the given instruction is not a (non-tail) call instruction.
620+
pub fn append_user_stack_map_entries(
621+
&mut self,
622+
inst: Inst,
623+
entries: impl IntoIterator<Item = UserStackMapEntry>,
624+
) {
625+
for entry in entries {
626+
self.append_user_stack_map_entry(inst, entry);
627+
}
628+
}
629+
630+
/// Take the stack map entries for a given instruction, leaving the
631+
/// instruction without stack maps.
632+
pub(crate) fn take_user_stack_map_entries(
633+
&mut self,
634+
inst: Inst,
635+
) -> Option<UserStackMapEntryVec> {
636+
self.user_stack_maps.remove(&inst)
637+
}
602638
}
603639

604640
/// Where did a value come from?

cranelift/codegen/src/ir/exception_table.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,15 @@ impl ExceptionTableData {
106106
DisplayExceptionTable { table: self, pool }
107107
}
108108

109+
/// Deep-clone this exception table.
110+
pub fn deep_clone(&self, pool: &mut ValueListPool) -> Self {
111+
Self {
112+
targets: self.targets.iter().map(|b| b.deep_clone(pool)).collect(),
113+
tags: self.tags.clone(),
114+
sig: self.sig,
115+
}
116+
}
117+
109118
/// Get the default target for the non-exceptional return case.
110119
pub fn normal_return(&self) -> &BlockCall {
111120
self.targets.last().unwrap()
@@ -140,6 +149,18 @@ impl ExceptionTableData {
140149
.zip(self.targets.iter_mut())
141150
}
142151

152+
/// The number of catch edges in this exception table.
153+
pub fn len_catches(&self) -> usize {
154+
self.tags.len()
155+
}
156+
157+
/// Get the `index`th catch edge from this table.
158+
pub fn get_catch(&self, index: usize) -> Option<(Option<ExceptionTag>, &BlockCall)> {
159+
let tag = self.tags.get(index)?.expand();
160+
let target = &self.targets[index];
161+
Some((tag, target))
162+
}
163+
143164
/// Get all branch targets.
144165
pub fn all_branches(&self) -> &[BlockCall] {
145166
&self.targets[..]
@@ -156,11 +177,34 @@ impl ExceptionTableData {
156177
self.sig
157178
}
158179

180+
/// Get a mutable handle to this exception table's signature.
181+
pub(crate) fn signature_mut(&mut self) -> &mut SigRef {
182+
&mut self.sig
183+
}
184+
159185
/// Clears all entries in this exception table, but leaves the function signature.
160186
pub fn clear(&mut self) {
161187
self.tags.clear();
162188
self.targets.clear();
163189
}
190+
191+
/// Push a catch target onto this exception table.
192+
///
193+
/// # Panics
194+
///
195+
/// Panics if this exception table has been cleared.
196+
pub fn push_catch(&mut self, tag: Option<ExceptionTag>, block_call: BlockCall) {
197+
assert_eq!(
198+
self.tags.len() + 1,
199+
self.targets.len(),
200+
"cannot push onto an exception table that has been cleared"
201+
);
202+
203+
self.tags.push(tag.into());
204+
205+
let target_index = self.targets.len() - 1;
206+
self.targets.insert(target_index, block_call);
207+
}
164208
}
165209

166210
/// A wrapper for the context required to display a

cranelift/codegen/src/ir/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ pub use crate::ir::stackslot::{
6868
};
6969
pub use crate::ir::trapcode::TrapCode;
7070
pub use crate::ir::types::Type;
71+
pub(crate) use crate::ir::user_stack_maps::UserStackMapEntryVec;
7172
pub use crate::ir::user_stack_maps::{UserStackMap, UserStackMapEntry};
7273

7374
use crate::entity::{PrimaryMap, SecondaryMap, entity_impl};

cranelift/codegen/src/lib.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,9 @@ extern crate alloc;
2121
extern crate std;
2222

2323
#[cfg(not(feature = "std"))]
24-
use hashbrown::{HashMap, hash_map};
24+
use hashbrown::{HashMap, HashSet, hash_map};
2525
#[cfg(feature = "std")]
26-
use std::collections::{HashMap, hash_map};
26+
use std::collections::{HashMap, HashSet, hash_map};
2727

2828
pub use crate::context::Context;
2929
pub use crate::value_label::{LabelValueLoc, ValueLabelsRanges, ValueLocRange};
@@ -50,6 +50,7 @@ pub mod data_value;
5050
pub mod dbg;
5151
pub mod dominator_tree;
5252
pub mod flowgraph;
53+
pub mod inline;
5354
pub mod ir;
5455
pub mod isa;
5556
pub mod loop_analysis;

cranelift/filetests/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ rust-version.workspace = true
1414
workspace = true
1515

1616
[dependencies]
17-
cranelift-codegen = { workspace = true, features = ["disas"] }
17+
cranelift-codegen = { workspace = true, features = ["disas", "timing"] }
1818
cranelift-frontend = { workspace = true }
1919
cranelift-interpreter = { workspace = true }
2020
cranelift-native = { workspace = true }
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
test inline precise-output
2+
target x86_64
3+
4+
function %f0(i32, i32) -> i32 {
5+
block0(v0: i32, v1: i32):
6+
v2 = iadd v0, v1
7+
return v2
8+
}
9+
10+
; (no functions inlined into %f0)
11+
12+
function %f1() -> i32 {
13+
fn0 = %f0(i32, i32) -> i32
14+
block0():
15+
v0 = iconst.i32 10
16+
v1 = call fn0(v0, v0)
17+
return v1
18+
}
19+
20+
; function %f1() -> i32 fast {
21+
; sig0 = (i32, i32) -> i32 fast
22+
; fn0 = %f0 sig0
23+
;
24+
; block0:
25+
; v0 = iconst.i32 10
26+
; jump block1
27+
;
28+
; block1:
29+
; v3 = iadd.i32 v0, v0 ; v0 = 10, v0 = 10
30+
; jump block2(v3)
31+
;
32+
; block2(v2: i32):
33+
; v1 -> v2
34+
; return v1
35+
; }
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
test inline precise-output
2+
target x86_64
3+
4+
function %f0(i32, i32) -> i32 tail {
5+
fn0 = %whatever(i32, i32) -> i32 tail
6+
block0(v0: i32, v1: i32):
7+
br_table v0, block3, [block1(v1), block2]
8+
block1(v2: i32):
9+
return v2
10+
block2:
11+
v3 = iconst.i32 0
12+
return v3
13+
block3:
14+
return v0
15+
}
16+
17+
; (no functions inlined into %f0)
18+
19+
function %f1() -> i32 tail {
20+
fn0 = %f0(i32, i32) -> i32 tail
21+
block0:
22+
v0 = iconst.i32 42
23+
v1 = call fn0(v0, v0)
24+
v2 = iconst.i32 1
25+
v3 = iadd v1, v2
26+
return v3
27+
}
28+
29+
; function %f1() -> i32 tail {
30+
; sig0 = (i32, i32) -> i32 tail
31+
; sig1 = (i32, i32) -> i32 tail
32+
; fn0 = %f0 sig0
33+
; fn1 = %whatever sig1
34+
;
35+
; block0:
36+
; v0 = iconst.i32 42
37+
; jump block1
38+
;
39+
; block1:
40+
; br_table v0, block4, [block2(v0), block3] ; v0 = 42, v0 = 42
41+
;
42+
; block2(v4: i32):
43+
; jump block5(v4)
44+
;
45+
; block3:
46+
; v6 = iconst.i32 0
47+
; jump block5(v6) ; v6 = 0
48+
;
49+
; block4:
50+
; jump block5(v0) ; v0 = 42
51+
;
52+
; block5(v5: i32):
53+
; v1 -> v5
54+
; v2 = iconst.i32 1
55+
; v3 = iadd v1, v2 ; v2 = 1
56+
; return v3
57+
; }
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
test inline precise-output
2+
target x86_64
3+
4+
function %f0(i32, i32) -> i32 {
5+
block0(v0: i32, v1: i32):
6+
v2 = iconst.i32 0
7+
jump block1(v2, v2)
8+
9+
block1(v3: i32, v4: i32):
10+
v5 = icmp eq v1, v4
11+
brif v4, block3, block2
12+
13+
block2:
14+
v6 = iadd v3, v0
15+
v7 = iconst.i32 1
16+
v8 = iadd v4, v7
17+
jump block1(v6, v8)
18+
19+
block3:
20+
return v3
21+
}
22+
23+
; (no functions inlined into %f0)
24+
25+
function %f1() -> i32 {
26+
fn0 = %f0(i32, i32) -> i32
27+
block0:
28+
v0 = iconst.i32 42
29+
v1 = iconst.i32 10
30+
v2 = call fn0(v0, v1)
31+
return v2
32+
}
33+
34+
; function %f1() -> i32 fast {
35+
; sig0 = (i32, i32) -> i32 fast
36+
; fn0 = %f0 sig0
37+
;
38+
; block0:
39+
; v0 = iconst.i32 42
40+
; v1 = iconst.i32 10
41+
; jump block1
42+
;
43+
; block1:
44+
; v6 = iconst.i32 0
45+
; jump block2(v6, v6) ; v6 = 0, v6 = 0
46+
;
47+
; block2(v3: i32, v4: i32):
48+
; v7 = icmp.i32 eq v1, v4 ; v1 = 10
49+
; brif v4, block4, block3
50+
;
51+
; block3:
52+
; v8 = iadd.i32 v3, v0 ; v0 = 42
53+
; v9 = iconst.i32 1
54+
; v10 = iadd.i32 v4, v9 ; v9 = 1
55+
; jump block2(v8, v10)
56+
;
57+
; block4:
58+
; jump block5(v3)
59+
;
60+
; block5(v5: i32):
61+
; v2 -> v5
62+
; return v2
63+
; }

0 commit comments

Comments
 (0)